/*
ESXX - The friendly ECMAscript/XML Application Server
Copyright (C) 2007-2015 Martin Blom <martin@blom.org>
This program is free software: you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation, either version 3
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.esxx.util;
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.esxx.ESXXException;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.Scriptable;
import org.mozilla.javascript.ScriptableObject;
public abstract class StringUtil {
public interface ParamResolver {
public String resolveParam(String param);
}
public static String format(String format, ParamResolver resolver) {
if (format == null) {
return null;
}
StringBuffer s = new StringBuffer();
Matcher m = paramPattern.matcher(format);
while (m.find()) {
String g = m.group();
if (m.start(1) != -1) {
// Match on group 1, which is our parameter pattern; append a single '?'
m.appendReplacement(s, Matcher.quoteReplacement(resolver.resolveParam(g.substring(1, g.length() - 1))));
}
else {
// Match on quoted strings, which we just copy as-is
m.appendReplacement(s, Matcher.quoteReplacement(g));
}
}
m.appendTail(s);
return s.toString();
}
// TODO: Consider replacing all this with just \{ escape notation instead
private static final String quotePattern1 = "('((\\\\')|[^'])+')";
private static final String quotePattern2 = "(`((\\\\`)|[^`])+`)";
private static final String quotePattern3 = "(\"((\\\\\")|[^\"])+\")";
private static final Pattern paramPattern =
Pattern.compile("(\\{[^\\}]+\\})" + // Group 1: Matches {identifier}
"|" + quotePattern1 + "|" + quotePattern2 + "|" + quotePattern3);
public static String encodeFormVariables(String cs, Scriptable values)
throws java.io.UnsupportedEncodingException {
StringBuilder sb = new StringBuilder();
for (Object o : values.getIds()) {
if (sb.length() != 0) {
sb.append("&");
}
if (o instanceof String) {
String key = (String) o;
String value = Context.toString(values.get(key, values));
sb.append(URLEncoder.encode(key, cs));
sb.append("=");
sb.append(URLEncoder.encode(value, cs));
}
else {
int key = (Integer) o;
String value = Context.toString(values.get(key, values));
sb.append(key).append("=");
sb.append(URLEncoder.encode(value, cs));
}
}
return sb.toString();
}
public static void decodeFormVariables(String value, Scriptable result)
throws java.io.UnsupportedEncodingException {
if (value.length() > 0) {
String[] args = value.split("&");
for (String arg : args) {
String[] nv = arg.split("=", 2);
String n = URLDecoder.decode(nv[0], "UTF-8").trim();
if (nv.length == 1) {
ScriptableObject.putProperty(result, makeXMLName(n, ""), "");
}
else if (nv.length == 2) {
String v = URLDecoder.decode(nv[1], "UTF-8");
ScriptableObject.putProperty(result, makeXMLName(n, ""), v);
}
}
}
}
public static String makeXMLName(String s, String replacement) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < s.length(); ++i) {
if (!isNameChar(s.charAt(i))) {
sb.append(replacement);
}
else {
sb.append(s.charAt(i));
}
}
String name = sb.toString();
// Add an underscore if first char is NameChar but not NameStartChar
if(name.length() == 0 || (!isNameStartChar(name.charAt(0)) && isNameChar(name.charAt(0)))) {
name = "_" + name;
}
return name;
}
private static boolean isNameStartChar(char ch) {
return (Character.isLetter(ch) || ch == '_');
}
private static boolean isNameChar(char ch) {
return (isNameStartChar(ch) || Character.isDigit(ch) || ch == '.' || ch == '-');
}
/*
* ECMA 3, 15.1.3 URI Handling Function Properties
*
* The following are implementations of the algorithms
* given in the ECMA specification for the hidden functions
* 'Encode' and 'Decode'.
*
* This method is taken from org.mozilla.javascript.NativeGlobal
*/
public static String encodeURI(String str, boolean fullUri)
throws URISyntaxException {
byte[] utf8buf = null;
StringBuffer sb = null;
for (int k = 0, length = str.length(); k != length; ++k) {
char C = str.charAt(k);
if (encodeUnescaped(C, fullUri)) {
if (sb != null) {
sb.append(C);
}
} else {
if (sb == null) {
sb = new StringBuffer(length + 3);
sb.append(str);
sb.setLength(k);
utf8buf = new byte[6];
}
if (0xDC00 <= C && C <= 0xDFFF) {
throw new URISyntaxException(str, "Illegal URI format");
}
int V;
if (C < 0xD800 || 0xDBFF < C) {
V = C;
} else {
k++;
if (k == length) {
throw new URISyntaxException(str, "Illegal URI format");
}
char C2 = str.charAt(k);
if (!(0xDC00 <= C2 && C2 <= 0xDFFF)) {
throw new URISyntaxException(str, "Illegal URI format");
}
V = ((C - 0xD800) << 10) + (C2 - 0xDC00) + 0x10000;
}
int L = oneUcs4ToUtf8Char(utf8buf, V);
for (int j = 0; j < L; j++) {
int d = 0xff & utf8buf[j];
sb.append('%');
sb.append(toHexChar(d >>> 4));
sb.append(toHexChar(d & 0xf));
}
}
}
return (sb == null) ? str : sb.toString();
}
private static char toHexChar(int i) {
if (i >> 4 != 0) {
throw new ESXXException("Totally unexpected error in StringUtil.toHexChar()");
}
return (char)((i < 10) ? i + '0' : i - 10 + 'A');
}
private static int unHex(char c) {
if ('A' <= c && c <= 'F') {
return c - 'A' + 10;
} else if ('a' <= c && c <= 'f') {
return c - 'a' + 10;
} else if ('0' <= c && c <= '9') {
return c - '0';
} else {
return -1;
}
}
private static int unHex(char c1, char c2) {
int i1 = unHex(c1);
int i2 = unHex(c2);
if (i1 >= 0 && i2 >= 0) {
return (i1 << 4) | i2;
}
return -1;
}
// This method is taken from org.mozilla.javascript.NativeGlobal
public static String decodeURI(String str, boolean fullUri)
throws URISyntaxException {
char[] buf = null;
int bufTop = 0;
for (int k = 0, length = str.length(); k != length;) {
char C = str.charAt(k);
if (C != '%') {
if (buf != null) {
buf[bufTop++] = C;
}
++k;
} else {
if (buf == null) {
// decode always compress so result can not be bigger then
// str.length()
buf = new char[length];
str.getChars(0, k, buf, 0);
bufTop = k;
}
int start = k;
if (k + 3 > length) {
throw new URISyntaxException(str, "Illegal URI format");
}
int B = unHex(str.charAt(k + 1), str.charAt(k + 2));
if (B < 0) {
throw new URISyntaxException(str, "Illegal URI format");
}
k += 3;
if ((B & 0x80) == 0) {
C = (char)B;
} else {
// Decode UTF-8 sequence into ucs4Char and encode it into
// UTF-16
int utf8Tail, ucs4Char, minUcs4Char;
if ((B & 0xC0) == 0x80) {
// First UTF-8 should be ouside 0x80..0xBF
throw new URISyntaxException(str, "Illegal URI format");
} else if ((B & 0x20) == 0) {
utf8Tail = 1; ucs4Char = B & 0x1F;
minUcs4Char = 0x80;
} else if ((B & 0x10) == 0) {
utf8Tail = 2; ucs4Char = B & 0x0F;
minUcs4Char = 0x800;
} else if ((B & 0x08) == 0) {
utf8Tail = 3; ucs4Char = B & 0x07;
minUcs4Char = 0x10000;
} else if ((B & 0x04) == 0) {
utf8Tail = 4; ucs4Char = B & 0x03;
minUcs4Char = 0x200000;
} else if ((B & 0x02) == 0) {
utf8Tail = 5; ucs4Char = B & 0x01;
minUcs4Char = 0x4000000;
} else {
// First UTF-8 can not be 0xFF or 0xFE
throw new URISyntaxException(str, "Illegal URI format");
}
if (k + 3 * utf8Tail > length) {
throw new URISyntaxException(str, "Illegal URI format");
}
for (int j = 0; j != utf8Tail; j++) {
if (str.charAt(k) != '%') {
throw new URISyntaxException(str, "Illegal URI format");
}
B = unHex(str.charAt(k + 1), str.charAt(k + 2));
if (B < 0 || (B & 0xC0) != 0x80) {
throw new URISyntaxException(str, "Illegal URI format");
}
ucs4Char = (ucs4Char << 6) | (B & 0x3F);
k += 3;
}
// Check for overlongs and other should-not-present codes
if (ucs4Char < minUcs4Char
|| ucs4Char == 0xFFFE || ucs4Char == 0xFFFF)
{
ucs4Char = 0xFFFD;
}
if (ucs4Char >= 0x10000) {
ucs4Char -= 0x10000;
if (ucs4Char > 0xFFFFF) {
throw new URISyntaxException(str, "Illegal URI format");
}
char H = (char)((ucs4Char >>> 10) + 0xD800);
C = (char)((ucs4Char & 0x3FF) + 0xDC00);
buf[bufTop++] = H;
} else {
C = (char)ucs4Char;
}
}
if (fullUri && URI_DECODE_RESERVED.indexOf(C) >= 0) {
for (int x = start; x != k; x++) {
buf[bufTop++] = str.charAt(x);
}
} else {
buf[bufTop++] = C;
}
}
}
return (buf == null) ? str : new String(buf, 0, bufTop);
}
private static boolean encodeUnescaped(char c, boolean fullUri) {
if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
|| ('0' <= c && c <= '9'))
{
return true;
}
if ("-_.!~*'()".indexOf(c) >= 0)
return true;
if (fullUri) {
return URI_DECODE_RESERVED.indexOf(c) >= 0;
}
return false;
}
private static final String URI_DECODE_RESERVED = ";/?:@&=+$,#";
/* Convert one UCS-4 char and write it into a UTF-8 buffer, which must be
* at least 6 bytes long. Return the number of UTF-8 bytes of data written.
*/
private static int oneUcs4ToUtf8Char(byte[] utf8Buffer, int ucs4Char) {
int utf8Length = 1;
//JS_ASSERT(ucs4Char <= 0x7FFFFFFF);
if ((ucs4Char & ~0x7F) == 0)
utf8Buffer[0] = (byte)ucs4Char;
else {
int i;
int a = ucs4Char >>> 11;
utf8Length = 2;
while (a != 0) {
a >>>= 5;
utf8Length++;
}
i = utf8Length;
while (--i > 0) {
utf8Buffer[i] = (byte)((ucs4Char & 0x3F) | 0x80);
ucs4Char >>>= 6;
}
utf8Buffer[0] = (byte)(0x100 - (1 << (8-utf8Length)) + ucs4Char);
}
return utf8Length;
}
public static String toSortable(Object o) {
if (o instanceof Number) {
long n;
if (o instanceof Float || o instanceof Double) {
n = Double.doubleToLongBits(((Number) o).doubleValue());
n ^= n < 0 ? 0xffffffffffffffffL : 0x8000000000000000L;
}
else {
n = ((Number) o).longValue();
}
return String.format(java.util.Locale.ROOT, "%016x", n);
}
else {
return o.toString();
}
}
}