/*
* Copyright (C) 2009 eXo Platform SAS.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.exoplatform.services.jcr.util;
import java.io.CharArrayWriter;
import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Properties;
/**
* This Class provides some text related utilities
*/
public class Text
{
/**
* Hidden constructor.
*/
private Text()
{
}
/**
* used for the md5
*/
public static final char[] hexTable = "0123456789abcdef".toCharArray();
/**
* Calculate an MD5 hash of the string given.
*
* @param data
* the data to encode
* @param enc
* the character encoding to use
* @return a hex encoded string of the md5 digested input
*/
public static String md5(String data, String enc) throws UnsupportedEncodingException
{
try
{
return digest("MD5", data.getBytes(enc));
}
catch (NoSuchAlgorithmException e)
{
throw new InternalError("MD5 digest not available???");
}
}
/**
* Calculate an MD5 hash of the string given using 'utf-8' encoding.
*
* @param data
* the data to encode
* @return a hex encoded string of the md5 digested input
*/
public static String md5(String data)
{
try
{
return md5(data, "utf-8");
}
catch (UnsupportedEncodingException e)
{
throw new InternalError("UTF8 digest not available???");
}
}
/**
* Digest the plain string using the given algorithm.
*
* @param algorithm
* The alogrithm for the digest. This algorithm must be supported by the MessageDigest
* class.
* @param data
* The plain text String to be digested.
* @param enc
* The character encoding to use
* @return The digested plain text String represented as Hex digits.
* @throws java.security.NoSuchAlgorithmException
* if the desired algorithm is not supported by the MessageDigest class.
* @throws java.io.UnsupportedEncodingException
* if the encoding is not supported
*/
public static String digest(String algorithm, String data, String enc) throws NoSuchAlgorithmException,
UnsupportedEncodingException
{
return digest(algorithm, data.getBytes(enc));
}
/**
* Digest the plain string using the given algorithm.
*
* @param algorithm
* The alogrithm for the digest. This algorithm must be supported by the MessageDigest
* class.
* @param data
* the data to digest with the given algorithm
* @return The digested plain text String represented as Hex digits.
* @throws java.security.NoSuchAlgorithmException
* if the desired algorithm is not supported by the MessageDigest class.
*/
public static String digest(String algorithm, byte[] data) throws NoSuchAlgorithmException
{
MessageDigest md = MessageDigest.getInstance(algorithm);
byte[] digest = md.digest(data);
StringBuilder res = new StringBuilder(digest.length * 2);
for (int i = 0; i < digest.length; i++)
{
byte b = digest[i];
res.append(hexTable[(b >> 4) & 15]);
res.append(hexTable[b & 15]);
}
return res.toString();
}
/**
* returns an array of strings decomposed of the original string, split at every occurance of
* 'ch'. if 2 'ch' follow each other with no intermediate characters, empty "" entries are
* avoided.
*
* @param str
* the string to decompose
* @param ch
* the character to use a split pattern
* @return an array of strings
*/
public static String[] explode(String str, int ch)
{
return explode(str, ch, false);
}
/**
* returns an array of strings decomposed of the original string, split at every occurance of
* 'ch'.
*
* @param str
* the string to decompose
* @param ch
* the character to use a split pattern
* @param respectEmpty
* if <code>true</code>, empty elements are generated
* @return an array of strings
*/
public static String[] explode(String str, int ch, boolean respectEmpty)
{
if (str == null || str.length() == 0)
{
return new String[0];
}
ArrayList strings = new ArrayList();
int pos;
int lastpos = 0;
// add snipples
while ((pos = str.indexOf(ch, lastpos)) >= 0)
{
if (pos - lastpos > 0 || respectEmpty)
{
strings.add(str.substring(lastpos, pos));
}
lastpos = pos + 1;
}
// add rest
if (lastpos < str.length())
{
strings.add(str.substring(lastpos));
}
else if (respectEmpty && lastpos == str.length())
{
strings.add("");
}
// return stringarray
return (String[])strings.toArray(new String[strings.size()]);
}
/**
* Concatenates all strings in the string array using the specified delimiter.
*
* @param arr
* @param delim
* @return the concatenated string
*/
public static String implode(String[] arr, String delim)
{
StringBuilder buf = new StringBuilder();
for (int i = 0; i < arr.length; i++)
{
if (i > 0)
{
buf.append(delim);
}
buf.append(arr[i]);
}
return buf.toString();
}
/**
* Replaces all occurences of <code>oldString</code> in <code>text</code> with
* <code>newString</code>.
*
* @param text
* @param oldString
* old substring to be replaced with <code>newString</code>
* @param newString
* new substring to replace occurences of <code>oldString</code>
* @return a string
*/
public static String replace(String text, String oldString, String newString)
{
if (text == null || oldString == null || newString == null)
{
throw new IllegalArgumentException("null argument");
}
int pos = text.indexOf(oldString);
if (pos == -1)
{
return text;
}
int lastPos = 0;
StringBuilder sb = new StringBuilder(text.length());
while (pos != -1)
{
sb.append(text.substring(lastPos, pos));
sb.append(newString);
lastPos = pos + oldString.length();
pos = text.indexOf(oldString, lastPos);
}
if (lastPos < text.length())
{
sb.append(text.substring(lastPos));
}
return sb.toString();
}
/**
* Replaces illegal XML characters in the given string by their corresponding predefined entity
* references.
*
* @param text
* text to be escaped
* @return a string
*/
public static String encodeIllegalXMLCharacters(String text)
{
if (text == null)
{
throw new IllegalArgumentException("null argument");
}
StringBuilder buf = null;
int length = text.length();
int pos = 0;
for (int i = 0; i < length; i++)
{
int ch = text.charAt(i);
switch (ch)
{
case '<' :
case '>' :
case '&' :
case '"' :
case '\'' :
if (buf == null)
{
buf = new StringBuilder();
}
if (i > 0)
{
buf.append(text.substring(pos, i));
}
pos = i + 1;
break;
default :
continue;
}
if (ch == '<')
{
buf.append("<");
}
else if (ch == '>')
{
buf.append(">");
}
else if (ch == '&')
{
buf.append("&");
}
else if (ch == '"')
{
buf.append(""");
}
else if (ch == '\'')
{
buf.append("'");
}
}
if (buf == null)
{
return text;
}
else
{
if (pos < length)
{
buf.append(text.substring(pos));
}
return buf.toString();
}
}
/**
* The list of characters that are not encoded by the <code>escape()</code> and
* <code>unescape()</code> METHODS. They contains the characters as defined 'unreserved' in
* section 2.3 of the RFC 2396 'URI generic syntax': <br>
*
* <pre>
* unreserved = alphanum | mark
* mark = "-" | "_" | "." | "!" | "˜" | "*" |
* "'" | "(" | ")"
* </pre>
*/
public static BitSet URISave;
/**
* Same as {@link #URISave} but also contains the '/'
*/
public static BitSet URISaveEx;
static
{
URISave = new BitSet(256);
int i;
for (i = 'a'; i <= 'z'; i++)
{
URISave.set(i);
}
for (i = 'A'; i <= 'Z'; i++)
{
URISave.set(i);
}
for (i = '0'; i <= '9'; i++)
{
URISave.set(i);
}
URISave.set('-');
URISave.set('_');
URISave.set('.');
URISave.set('!');
URISave.set('~');
URISave.set('*');
URISave.set('\'');
URISave.set('(');
URISave.set(')');
URISaveEx = (BitSet)URISave.clone();
URISaveEx.set('/');
}
/**
* Does an URL encoding of the <code>string</code> using the <code>escape</code> character. The
* characters that don't need encoding are those defined 'unreserved' in section 2.3 of the 'URI
* generic syntax' RFC 2396, but without the escape character.
*
* @param string
* the string to encode.
* @param escape
* the escape character.
* @return the escaped string
* @throws NullPointerException
* if <code>string</code> is <code>null</code>.
*/
public static String escape(String string, char escape)
{
return escape(string, escape, false);
}
/**
* Does an URL encoding of the <code>string</code> using the <code>escape</code> character. The
* characters that don't need encoding are those defined 'unreserved' in section 2.3 of the 'URI
* generic syntax' RFC 2396, but without the escape character. If <code>isPath</code> is
* <code>true</code>, additionally the slash '/' is ignored, too.
*
* @param string
* the string to encode.
* @param escape
* the escape character.
* @param isPath
* if <code>true</code>, the string is treated as path
* @return the escaped string
* @throws NullPointerException
* if <code>string</code> is <code>null</code>.
*/
public static String escape(String string, char escape, boolean isPath)
{
try
{
BitSet validChars = isPath ? URISaveEx : URISave;
byte[] bytes = string.getBytes("utf-8");
StringBuilder out = new StringBuilder(bytes.length);
for (int i = 0; i < bytes.length; i++)
{
int c = bytes[i] & 0xff;
if (validChars.get(c) && c != escape)
{
out.append((char)c);
}
else
{
out.append(escape);
out.append(hexTable[(c >> 4) & 0x0f]);
out.append(hexTable[(c) & 0x0f]);
}
}
return out.toString();
}
catch (UnsupportedEncodingException e)
{
throw new InternalError(e.toString());
}
}
/**
* Does a URL encoding of the <code>string</code>. The characters that don't need encoding are
* those defined 'unreserved' in section 2.3 of the 'URI generic syntax' RFC 2396.
*
* @param string
* the string to encode
* @return the escaped string
* @throws NullPointerException
* if <code>string</code> is <code>null</code>.
*/
public static String escape(String string)
{
return escape(string, '%');
}
/**
* Does a URL encoding of the <code>path</code>. The characters that don't need encoding are those
* defined 'unreserved' in section 2.3 of the 'URI generic syntax' RFC 2396. In contrast to the
* {@link #escape(String)} method, not the entire path string is escaped, but every individual
* part (i.e. the slashes are not escaped).
*
* @param path
* the path to encode
* @return the escaped path
* @throws NullPointerException
* if <code>path</code> is <code>null</code>.
*/
public static String escapePath(String path)
{
return escape(path, '%', true);
}
/**
* Does a URL decoding of the <code>string</code> using the <code>escape</code> character. Please
* note that in opposite to the {@link java.net.URLDecoder} it does not transform the + into
* spaces.
*
* @param string
* the string to decode
* @param escape
* the escape character
* @return the decoded string
* @throws NullPointerException
* if <code>string</code> is <code>null</code>.
* @throws ArrayIndexOutOfBoundsException
* if not enough character follow an escape character
* @throws IllegalArgumentException
* if the 2 characters following the escape character do not represent a hex-number.
*/
public static String unescape(String string, char escape)
{
CharArrayWriter out = new CharArrayWriter(string.length());
for (int i = 0; i < string.length(); i++)
{
char c = string.charAt(i);
if (c == escape)
{
try
{
out.write(Integer.parseInt(string.substring(i + 1, i + 3), 16));
}
catch (NumberFormatException e)
{
throw new IllegalArgumentException(e);
}
i += 2;
}
else
{
out.write(c);
}
}
return new String(out.toCharArray());
}
/**
* Does a URL decoding of the <code>string</code>. Please note that in opposite to the
* {@link java.net.URLDecoder} it does not transform the + into spaces.
*
* @param string
* the string to decode
* @return the decoded string
* @throws NullPointerException
* if <code>string</code> is <code>null</code>.
* @throws ArrayIndexOutOfBoundsException
* if not enough character follow an escape character
* @throws IllegalArgumentException
* if the 2 characters following the escape character do not represent a hex-number.
*/
public static String unescape(String string)
{
return unescape(string, '%');
}
/**
* Escapes all illegal JCR name characters of a string. The encoding is loosely modeled after URI
* encoding, but only encodes the characters it absolutely needs to in order to make the resulting
* string a valid JCR name. Use {@link #unescapeIllegalJcrChars(String)} for decoding. <br> QName
* EBNF:<br>
* {@code
* <xmp> simplename ::= onecharsimplename | twocharsimplename | threeormorecharname
* onecharsimplename ::= (* Any Unicode character except: '.', '/', ':', '[', ']', '*', ''', '"',
* '|' or any whitespace character *) twocharsimplename ::= '.' onecharsimplename |
* onecharsimplename '.' | onecharsimplename onecharsimplename threeormorecharname ::= nonspace
* string nonspace string ::= char | string char char ::= nonspace | ' ' nonspace ::= (* Any
* Unicode character except: '/', ':', '[', ']', '*', ''', '"', '|' or any whitespace character *)
* </xmp>
* }
*
* @param name
* the name to escape
* @return the escaped name
*/
public static String escapeIllegalJcrChars(String name)
{
StringBuilder buffer = new StringBuilder(name.length() * 2);
for (int i = 0; i < name.length(); i++)
{
char ch = name.charAt(i);
if (ch == '%' || ch == '/' || ch == ':' || ch == '[' || ch == ']' || ch == '*' || ch == '\'' || ch == '"'
|| ch == '|' || (ch == '.' && name.length() < 3) || (ch == ' ' && (i == 0 || i == name.length() - 1))
|| ch == '\t' || ch == '\r' || ch == '\n')
{
buffer.append('%');
buffer.append(Character.toUpperCase(Character.forDigit(ch / 16, 16)));
buffer.append(Character.toUpperCase(Character.forDigit(ch % 16, 16)));
}
else
{
buffer.append(ch);
}
}
return buffer.toString();
}
/**
* Unescapes previously escaped jcr chars. <br> Please note, that this does not exactly the same
* as the url related {@link #unescape(String)}, since it handles the byte-encoding differently.
*
* @param name
* the name to unescape
* @return the unescaped name
*/
public static String unescapeIllegalJcrChars(String name)
{
StringBuilder buffer = new StringBuilder(name.length());
int i = name.indexOf('%');
while (i > -1 && i + 2 < name.length())
{
buffer.append(name.toCharArray(), 0, i);
int a = Character.digit(name.charAt(i + 1), 16);
int b = Character.digit(name.charAt(i + 2), 16);
if (a > -1 && b > -1)
{
buffer.append((char)(a * 16 + b));
name = name.substring(i + 3);
}
else
{
buffer.append('%');
name = name.substring(i + 1);
}
i = name.indexOf('%');
}
buffer.append(name);
return buffer.toString();
}
/**
* Returns the name part of the path
*
* @param path
* the path
* @return the name part
*/
public static String getName(String path)
{
int pos = path.lastIndexOf('/');
return pos >= 0 ? path.substring(pos + 1) : "";
}
/**
* Returns the name part of the path, delimited by the given <code>delim</code>
*
* @param path
* the path
* @param delim
* the delimiter
* @return the name part
*/
public static String getName(String path, char delim)
{
int pos = path.lastIndexOf(delim);
return pos >= 0 ? path.substring(pos + 1) : "";
}
/**
* Same as {@link #getName(String)} but adding the possibility to pass paths that end with a
* trailing '/'
*
* @see #getName(String)
*/
public static String getName(String path, boolean ignoreTrailingSlash)
{
if (ignoreTrailingSlash && path.endsWith("/") && path.length() > 1)
{
path = path.substring(0, path.length() - 1);
}
return getName(path);
}
/**
* Returns the namespace prefix of the given <code>qname</code>. If the prefix is missing, an
* empty string is returned. Please note, that this method does not validate the name or prefix.
* <br> the qname has the format: qname := [prefix ':'] local;
*
* @param qname
* a qualified name
* @return the prefix of the name or "".
* @see #getLocalName(String)
* @throws NullPointerException
* if <code>qname</code> is <code>null</code>
*/
public static String getNamespacePrefix(String qname)
{
int pos = qname.indexOf(':');
return pos >= 0 ? qname.substring(0, pos) : "";
}
/**
* Returns the local name of the given <code>qname</code>. Please note, that this method does not
* validate the name. <br> the qname has the format: qname := [prefix ':'] local;
*
* @param qname
* a qualified name
* @return the localname
* @see #getNamespacePrefix(String)
* @throws NullPointerException
* if <code>qname</code> is <code>null</code>
*/
public static String getLocalName(String qname)
{
int pos = qname.indexOf(':');
return pos >= 0 ? qname.substring(pos + 1) : qname;
}
/**
* Determines, if two paths denote hierarchical siblins.
*
* @param p1
* first path
* @param p2
* second path
* @return true if on same level, false otherwise
*/
public static boolean isSibling(String p1, String p2)
{
int pos1 = p1.lastIndexOf('/');
int pos2 = p2.lastIndexOf('/');
return (pos1 == pos2 && pos1 >= 0 && p1.regionMatches(0, p2, 0, pos1));
}
/**
* Determines if the <code>descendant</code> path is hierarchical a descendant of
* <code>path</code>.
*
* @param path
* the current path
* @param descendant
* the potential descendant
* @return <code>true</code> if the <code>descendant</code> is a descendant; <code>false</code>
* otherwise.
*/
public static boolean isDescendant(String path, String descendant)
{
return !path.equals(descendant) && descendant.startsWith(path) && descendant.charAt(path.length()) == '/';
}
/**
* Determines if the <code>descendant</code> path is hierarchical a descendant of
* <code>path</code> or equal to it.
*
* @param path
* the path to check
* @param descendant
* the potential descendant
* @return <code>true</code> if the <code>descendant</code> is a descendant or equal;
* <code>false</code> otherwise.
*/
public static boolean isDescendantOrEqual(String path, String descendant)
{
if (path.equals(descendant))
{
return true;
}
else
{
String pattern = path.endsWith("/") ? path : path + "/";
return descendant.startsWith(pattern);
}
}
/**
* Returns the n<sup>th</sup> relative parent of the path, where n=level.
* <p>
* Example:<br>
* <code>
* Text.getRelativeParent("/foo/bar/test", 1) == "/foo/bar"
* </code>
*
* @param path
* the path of the page
* @param level
* the level of the parent
* @return String relative parent
*/
public static String getRelativeParent(String path, int level)
{
int idx = path.length();
while (level > 0)
{
idx = path.lastIndexOf('/', idx - 1);
if (idx < 0)
{
return "";
}
level--;
}
return (idx == 0) ? "/" : path.substring(0, idx);
}
/**
* Same as {@link #getRelativeParent(String, int)} but adding the possibility to pass paths that
* end with a trailing '/'.
*
* @see #getRelativeParent(String, int)
* @param path
* path
* @param level
* level
* @param ignoreTrailingSlash
* ignore trailing slash
* @return String relative parent
*/
public static String getRelativeParent(String path, int level, boolean ignoreTrailingSlash)
{
if (ignoreTrailingSlash && path.endsWith("/") && path.length() > 1)
{
path = path.substring(0, path.length() - 1);
}
return getRelativeParent(path, level);
}
/**
* Returns the n<sup>th</sup> absolute parent of the path, where n=level.
* <p>
* Example:<br>
* <code>
* Text.getAbsoluteParent("/foo/bar/test", 1) == "/foo/bar"
* </code>
*
* @param path
* the path of the page
* @param level
* the level of the parent
* @return String absolute parent
*/
public static String getAbsoluteParent(String path, int level)
{
int idx = 0;
int len = path.length();
while (level >= 0 && idx < len)
{
idx = path.indexOf('/', idx + 1);
if (idx < 0)
{
idx = len;
}
level--;
}
return level >= 0 ? "" : path.substring(0, idx);
}
/**
* Performs variable replacement on the given string value. Each <code>${...}</code> sequence
* within the given value is replaced with the value of the named parser variable. If a variable
* is not found in the properties an IllegalArgumentException is thrown unless
* <code>ignoreMissing</code> is <code>true</code>. In the later case, the missing variable is
* replaced by the empty string.
*
* @param variables
* variables
* @param value
* the original value
* @param ignoreMissing
* if <code>true</code>, missing variables are replaced by the empty string.
* @return value after variable replacements
* @throws IllegalArgumentException
* if the replacement of a referenced variable is not found
*/
public static String replaceVariables(Properties variables, String value, boolean ignoreMissing)
throws IllegalArgumentException
{
StringBuilder result = new StringBuilder();
// Value:
// +--+-+--------+-+-----------------+
// | |p|--> |q|--> |
// +--+-+--------+-+-----------------+
int p = 0, q = value.indexOf("${"); // Find first ${
while (q != -1)
{
result.append(value.substring(p, q)); // Text before ${
p = q;
q = value.indexOf("}", q + 2); // Find }
if (q != -1)
{
String variable = value.substring(p + 2, q);
String replacement = variables.getProperty(variable);
if (replacement == null)
{
if (ignoreMissing)
{
replacement = "";
}
else
{
throw new IllegalArgumentException("Replacement not found for ${" + variable + "}.");
}
}
result.append(replacement);
p = q + 1;
q = value.indexOf("${", p); // Find next ${
}
}
result.append(value.substring(p, value.length())); // Trailing text
return result.toString();
}
}