/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.exoplatform.ecm.utils.text; import java.io.UnsupportedEncodingException; import java.util.BitSet; /** * This Class provides some text related utilities */ public class Text { private static final String SPECIAL_CHARACTERS = "&#*@\'\"|.\t\r\n$&\\><:"; public static String escape(String string, char escape, boolean isPath) { return escape(string, escape, isPath, ""); } /** * Does an URL encoding of the <code>string</code> using the <code>escape</code> character. The * characters that don't need encoding are those defined 'unreserved' in section 2.3 of the 'URI * generic syntax' RFC 2396, but without the escape character. If <code>isPath</code> is * <code>true</code>, additionally the slash '/' is ignored, too. * * @param string * the string to encode. * @param escape * the escape character. * @param isPath * if <code>true</code>, the string is treated as path * @param extraCharacters * the extra characters that will not be encoded. * @return the escaped string * @throws NullPointerException * if <code>string</code> is <code>null</code>. */ public static String escape(String string, char escape, boolean isPath, String extraCharacters) { try { BitSet validChars = isPath ? org.exoplatform.services.jcr.util.Text.URISaveEx : org.exoplatform.services.jcr.util.Text.URISave; BitSet extraBitSet = (BitSet)org.exoplatform.services.jcr.util.Text.URISave.clone(); for (char c : extraCharacters.toCharArray()) { extraBitSet.set(c); } byte[] bytes = string.getBytes("utf-8"); StringBuffer out = new StringBuffer(bytes.length); for (int i = 0; i < bytes.length; i++) { int c = bytes[i] & 0xff; if ((validChars.get(c) || extraBitSet.get(c))&& c != escape) { out.append((char) c); } else { out.append(escape); out.append(org.exoplatform.services.jcr.util.Text.hexTable[(c >> 4) & 0x0f]); out.append(org.exoplatform.services.jcr.util.Text.hexTable[(c) & 0x0f]); } } return out.toString(); } catch (UnsupportedEncodingException e) { throw new InternalError(e.toString()); } } /** * Escapes all illegal JCR name characters of a string. The encoding is loosely modeled after URI * encoding, but only encodes the characters it absolutely needs to in order to make the resulting * string a valid JCR name. Use {@link #unescapeIllegalJcrChars(String)} for decoding. <br> QName * EBNF:<br> * {@code * <xmp> simplename ::= onecharsimplename | twocharsimplename | threeormorecharname * onecharsimplename ::= (* Any Unicode character except: '.', '/', ':', '[', ']', '*', ''', '"', * '|' or any whitespace character *) twocharsimplename ::= '.' onecharsimplename | * onecharsimplename '.' | onecharsimplename onecharsimplename threeormorecharname ::= nonspace * string nonspace string ::= char | string char char ::= nonspace | ' ' nonspace ::= (* Any * Unicode character except: '/', ':', '[', ']', '*', ''', '"', '|' or any whitespace character *) * </xmp> * } * * @param name * the name to escape * @return the escaped name */ public static String escapeIllegalJcrChars(String name) { if (name == null || name.length() == 0) { return ""; } StringBuffer buffer = new StringBuffer(name.length() * 2); for (int i = 0; i < name.length(); i++) { char ch = name.charAt(i); if (ch == '&' || ch == '#' || ch == '*' || ch == '\'' || ch == '"' || ch == '|' || (ch == '.' && name.length() < 3) || (ch == ' ' && (i == 0 || i == name.length() - 1)) || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\\' || ch == '>' || ch == '<') { buffer.append('%'); buffer.append(Character.toUpperCase(Character.forDigit(ch / 16, 16))); buffer.append(Character.toUpperCase(Character.forDigit(ch % 16, 16))); } else { buffer.append(ch); } } return buffer.toString(); } /** * Unescapes previously escaped jcr chars. <br> Please note, that this does not exactly the same * as the url related unescape(String), since it handles the encoding differently. * * @param name the name to unescape * @return the unescaped name */ public static String unescapeIllegalJcrChars(String name) { return org.exoplatform.services.jcr.util.Text.unescapeIllegalJcrChars(name); } /** * converts all illegal JCR name characters of a string to '-' * * @param name * the name to escape * @return the converted name */ public static String convertJcrChars(String name) { if (name == null || name.length() == 0) { return ""; } StringBuffer buffer = new StringBuffer(name.length() * 2); for (int i = 0; i < name.length(); i++) { char ch = name.charAt(i); if (SPECIAL_CHARACTERS.indexOf(ch) != -1){ buffer.append('-'); } else { buffer.append(ch); } } return buffer.toString(); } }