/* * Copyright (C) 2009 eXo Platform SAS. * * This is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this software; if not, write to the Free * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA, or see the FSF site: http://www.fsf.org. */ package org.exoplatform.services.jcr.impl.util; import org.exoplatform.services.jcr.datamodel.InternalQName; import org.exoplatform.services.jcr.impl.xml.XMLChar; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Implements the encode and decode routines as specified for XML name to SQL identifier conversion * in ISO 9075-14:2003.<br> If a character <code>c</code> is not valid at a certain position in an * XML 1.0 NCName it is encoded in the form: '_x' + hexValueOf(c) + '_' <br> Note that only the * local part of a {@link javax.xml.namespace.QName} is encoded / decoded. A URI namespace * will always be valid and does not need encoding. */ public class ISO9075 { /** Hidden constructor. */ private ISO9075() { } /** Pattern on an encoded character */ private static final Pattern ENCODE_PATTERN = Pattern.compile("_x\\p{XDigit}{4}_"); /** Padding characters */ private static final char[] PADDING = new char[]{'0', '0', '0'}; /** All the possible hex digits */ private static final String HEX_DIGITS = "0123456789abcdefABCDEF"; /** * Encodes the local part of <code>name</code> as specified in ISO 9075. * * @param name * the <code>QName</code> to encode. * @return the encoded <code>QName</code> or <code>name</code> if it does not need encoding. */ public static InternalQName encode(InternalQName name) { String encoded = encode(name.getName()); if (encoded.equals(name.getName())) { return name; } else { return new InternalQName(name.getNamespace(), encoded); } } /** * Encodes <code>name</code> as specified in ISO 9075. * * @param name * the <code>String</code> to encode. * @return the encoded <code>String</code> or <code>name</code> if it does not need encoding. */ public static String encode(String name) { // quick check for root node name if (name.length() == 0) { return name; } if (XMLChar.isValidName(name) && name.indexOf("_x") < 0) { // already valid return name; } else { // encode StringBuilder encoded = new StringBuilder(); for (int i = 0; i < name.length(); i++) { if (i == 0) { // first character of name if (XMLChar.isNameStart(name.charAt(i))) { if (needsEscaping(name, i)) { // '_x' must be encoded encode('_', encoded); } else { encoded.append(name.charAt(i)); } } else { // not valid as first character -> encode encode(name.charAt(i), encoded); } } else if (!XMLChar.isName(name.charAt(i))) { encode(name.charAt(i), encoded); } else { if (needsEscaping(name, i)) { // '_x' must be encoded encode('_', encoded); } else { encoded.append(name.charAt(i)); } } } return encoded.toString(); } } /** * Decodes the <code>name</code>. * * @param name * the <code>QName</code> to decode. * @return the decoded <code>QName</code>. */ public static InternalQName decode(InternalQName name) { String decoded = decode(name.getName()); if (decoded.equals(name.getName())) { return name; } else { return new InternalQName(name.getNamespace(), decoded.toString()); } } /** * Decodes the <code>name</code>. * * @param name * the <code>String</code> to decode. * @return the decoded <code>String</code>. */ public static String decode(String name) { // quick check if (name.indexOf("_x") < 0) { // not encoded return name; } StringBuffer decoded = new StringBuffer(); Matcher m = ENCODE_PATTERN.matcher(name); while (m.find()) { m.appendReplacement(decoded, Character.toString((char)Integer.parseInt(m.group().substring(2, 6), 16))); } m.appendTail(decoded); return decoded.toString(); } // -------------------------< internal >------------------------------------- /** * Encodes the character <code>c</code> as a String in the following form: * <code>"_x" + hex value of c + "_"</code>. Where the hex value has four digits if the character * with possibly leading zeros. <br> Example: ' ' (the space character) is encoded to: _x0020_ * * @param c * the character to encode * @param b * the encoded character is appended to <code>StringBuilder</code> <code>b</code>. */ private static void encode(char c, StringBuilder b) { b.append("_x"); String hex = Integer.toHexString(c); b.append(PADDING, 0, 4 - hex.length()); b.append(hex); b.append("_"); } /** * Returns true if <code>name.charAt(location)</code> is the underscore character and the * following character sequence is 'xHHHH_' where H is a hex digit. * * @param name * the name to check. * @param location * the location to look at. * @throws ArrayIndexOutOfBoundsException * if location > name.length() */ private static boolean needsEscaping(String name, int location) throws ArrayIndexOutOfBoundsException { if (name.charAt(location) == '_' && name.length() >= location + 6) { return name.charAt(location + 1) == 'x' && HEX_DIGITS.indexOf(name.charAt(location + 2)) != -1 && HEX_DIGITS.indexOf(name.charAt(location + 3)) != -1 && HEX_DIGITS.indexOf(name.charAt(location + 4)) != -1 && HEX_DIGITS.indexOf(name.charAt(location + 5)) != -1; } else { return false; } } }