/** * Copyright 2010 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.waveprotocol.wave.model.id; import org.waveprotocol.wave.model.util.Utf16Util; import org.waveprotocol.wave.model.util.Utf16Util.CodePointHandler; import java.util.Arrays; /** * Utilities for working with identifiers compliant with the new specification. * * @author anorth@google.com (Alex North) * @see "http://code.google.com/p/wave-protocol/source/browse/spec/waveid/waveidspec.rst" */ public final class WaveIdentifiers { /** * Boolean array defining ASCII chars allowed in an identifier. * Entries correspond to character values. */ private static final boolean[] SAFE_ASCII_CHARS; static { SAFE_ASCII_CHARS = new boolean[0x7F]; for (char c = 'A'; c <= 'Z'; ++c) { SAFE_ASCII_CHARS[c] = true; } for (char c = 'a'; c <= 'z'; ++c) { SAFE_ASCII_CHARS[c] = true; } for (char c = '0'; c <= '9'; ++c) { SAFE_ASCII_CHARS[c] = true; } for (char c : Arrays.asList('-', '.', '_', '~', '+', '*', '@')) { SAFE_ASCII_CHARS[c] = true; } } private static final CodePointHandler<Boolean> GOOD_UTF16_FOR_ID = new CodePointHandler<Boolean>() { @Override public Boolean codePoint(int cp) { if (!Utf16Util.isCodePointValid(cp)) { return false; } if (cp < SAFE_ASCII_CHARS.length && !SAFE_ASCII_CHARS[cp]) { return false; } if (cp >= SAFE_ASCII_CHARS.length && !isUcsChar(cp)) { return false; } return null; } @Override public Boolean endOfString() { return true; } @Override public Boolean unpairedSurrogate(char c) { return false; } }; /** * Checks whether a UTF-16 string is a valid wave identifier. */ public static boolean isValidIdentifier(String id) { return !id.isEmpty() && Utf16Util.traverseUtf16String(id, GOOD_UTF16_FOR_ID); } /** * Checks if the given string has a valid host name specified, starting at the * given start index. This method implements a check for a valid domain as * specified by RFC 1035, Section 2.3.1. It essentially checks if the domain * matches the following regular expression: * <tt>[a-z0-9]([a-z0-9\-]*[a-z0-9])(\.[a-z0-9]([a-z0-9\-]*[a-z0-9]))*</tt>. * Please note that the specification does not restrict TLDs, and therefore * my.arbitrary.domain passes the check. We also allow labels to start with * a digit to allow for domains such as 76.com. Furthermore, we allow only * strings specified by the subdomain non-terminal,to avoid allowing empty * string, which can be derived from the domain non-terminal. */ public static boolean isValidDomain(int start, String x) { // TODO(user): Make sure we accept only valid TLDs. int index = start; int length = x.length() - start; if (length > 253 || length < 1) { return false; } while (index < x.length()) { char c = x.charAt(index); // A label must being with a letter or a digit. if (('a' > c || c > 'z') && ('0' > c || c > '9')) { return false; } char d = c; while (++index < x.length()) { c = x.charAt(index); // Subsequent characters may be letters, digits or the dash. if (('a' > c || c > 'z') && ('0' > c || c > '9') && (c != '-')) { break; } d = c; } if (index >= x.length()) { return d != '-'; } // Labels must be separated by dots, and may not end with the dash. if ('.' != c || d == '-') { return false; } ++index; } // The domain ended in a dot, legal but we do not approve. return false; } /** * Checks whether an int value is a valid UCS code-point above 0x7F as defined * in RFC 3987. */ private static boolean isUcsChar(int c) { return (c >= 0xA0 && c <= 0xD7FF) || (c >= 0xF900 && c <= 0xFDCF) || (c >= 0xFDF0 && c <= 0xFFEF) || (c >= 0x10000 && c <= 0x1FFFD) || (c >= 0x20000 && c <= 0x2FFFD) || (c >= 0x30000 && c <= 0x3FFFD) || (c >= 0x40000 && c <= 0x4FFFD) || (c >= 0x50000 && c <= 0x5FFFD) || (c >= 0x60000 && c <= 0x6FFFD) || (c >= 0x70000 && c <= 0x7FFFD) || (c >= 0x80000 && c <= 0x8FFFD) || (c >= 0x90000 && c <= 0x9FFFD) || (c >= 0xA0000 && c <= 0xAFFFD) || (c >= 0xB0000 && c <= 0xBFFFD) || (c >= 0xC0000 && c <= 0xCFFFD) || (c >= 0xD0000 && c <= 0xDFFFD) || (c >= 0xE1000 && c <= 0xEFFFD); } private WaveIdentifiers() { } }