/* ESXX - The friendly ECMAscript/XML Application Server Copyright (C) 2007-2015 Martin Blom <martin@blom.org> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.esxx.util; import java.net.URISyntaxException; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.esxx.ESXXException; import org.mozilla.javascript.Context; import org.mozilla.javascript.Scriptable; import org.mozilla.javascript.ScriptableObject; public abstract class StringUtil { public interface ParamResolver { public String resolveParam(String param); } public static String format(String format, ParamResolver resolver) { if (format == null) { return null; } StringBuffer s = new StringBuffer(); Matcher m = paramPattern.matcher(format); while (m.find()) { String g = m.group(); if (m.start(1) != -1) { // Match on group 1, which is our parameter pattern; append a single '?' m.appendReplacement(s, Matcher.quoteReplacement(resolver.resolveParam(g.substring(1, g.length() - 1)))); } else { // Match on quoted strings, which we just copy as-is m.appendReplacement(s, Matcher.quoteReplacement(g)); } } m.appendTail(s); return s.toString(); } // TODO: Consider replacing all this with just \{ escape notation instead private static final String quotePattern1 = "('((\\\\')|[^'])+')"; private static final String quotePattern2 = "(`((\\\\`)|[^`])+`)"; private static final String quotePattern3 = "(\"((\\\\\")|[^\"])+\")"; private static final Pattern paramPattern = Pattern.compile("(\\{[^\\}]+\\})" + // Group 1: Matches {identifier} "|" + quotePattern1 + "|" + quotePattern2 + "|" + quotePattern3); public static String encodeFormVariables(String cs, Scriptable values) throws java.io.UnsupportedEncodingException { StringBuilder sb = new StringBuilder(); for (Object o : values.getIds()) { if (sb.length() != 0) { sb.append("&"); } if (o instanceof String) { String key = (String) o; String value = Context.toString(values.get(key, values)); sb.append(URLEncoder.encode(key, cs)); sb.append("="); sb.append(URLEncoder.encode(value, cs)); } else { int key = (Integer) o; String value = Context.toString(values.get(key, values)); sb.append(key).append("="); sb.append(URLEncoder.encode(value, cs)); } } return sb.toString(); } public static void decodeFormVariables(String value, Scriptable result) throws java.io.UnsupportedEncodingException { if (value.length() > 0) { String[] args = value.split("&"); for (String arg : args) { String[] nv = arg.split("=", 2); String n = URLDecoder.decode(nv[0], "UTF-8").trim(); if (nv.length == 1) { ScriptableObject.putProperty(result, makeXMLName(n, ""), ""); } else if (nv.length == 2) { String v = URLDecoder.decode(nv[1], "UTF-8"); ScriptableObject.putProperty(result, makeXMLName(n, ""), v); } } } } public static String makeXMLName(String s, String replacement) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < s.length(); ++i) { if (!isNameChar(s.charAt(i))) { sb.append(replacement); } else { sb.append(s.charAt(i)); } } String name = sb.toString(); // Add an underscore if first char is NameChar but not NameStartChar if(name.length() == 0 || (!isNameStartChar(name.charAt(0)) && isNameChar(name.charAt(0)))) { name = "_" + name; } return name; } private static boolean isNameStartChar(char ch) { return (Character.isLetter(ch) || ch == '_'); } private static boolean isNameChar(char ch) { return (isNameStartChar(ch) || Character.isDigit(ch) || ch == '.' || ch == '-'); } /* * ECMA 3, 15.1.3 URI Handling Function Properties * * The following are implementations of the algorithms * given in the ECMA specification for the hidden functions * 'Encode' and 'Decode'. * * This method is taken from org.mozilla.javascript.NativeGlobal */ public static String encodeURI(String str, boolean fullUri) throws URISyntaxException { byte[] utf8buf = null; StringBuffer sb = null; for (int k = 0, length = str.length(); k != length; ++k) { char C = str.charAt(k); if (encodeUnescaped(C, fullUri)) { if (sb != null) { sb.append(C); } } else { if (sb == null) { sb = new StringBuffer(length + 3); sb.append(str); sb.setLength(k); utf8buf = new byte[6]; } if (0xDC00 <= C && C <= 0xDFFF) { throw new URISyntaxException(str, "Illegal URI format"); } int V; if (C < 0xD800 || 0xDBFF < C) { V = C; } else { k++; if (k == length) { throw new URISyntaxException(str, "Illegal URI format"); } char C2 = str.charAt(k); if (!(0xDC00 <= C2 && C2 <= 0xDFFF)) { throw new URISyntaxException(str, "Illegal URI format"); } V = ((C - 0xD800) << 10) + (C2 - 0xDC00) + 0x10000; } int L = oneUcs4ToUtf8Char(utf8buf, V); for (int j = 0; j < L; j++) { int d = 0xff & utf8buf[j]; sb.append('%'); sb.append(toHexChar(d >>> 4)); sb.append(toHexChar(d & 0xf)); } } } return (sb == null) ? str : sb.toString(); } private static char toHexChar(int i) { if (i >> 4 != 0) { throw new ESXXException("Totally unexpected error in StringUtil.toHexChar()"); } return (char)((i < 10) ? i + '0' : i - 10 + 'A'); } private static int unHex(char c) { if ('A' <= c && c <= 'F') { return c - 'A' + 10; } else if ('a' <= c && c <= 'f') { return c - 'a' + 10; } else if ('0' <= c && c <= '9') { return c - '0'; } else { return -1; } } private static int unHex(char c1, char c2) { int i1 = unHex(c1); int i2 = unHex(c2); if (i1 >= 0 && i2 >= 0) { return (i1 << 4) | i2; } return -1; } // This method is taken from org.mozilla.javascript.NativeGlobal public static String decodeURI(String str, boolean fullUri) throws URISyntaxException { char[] buf = null; int bufTop = 0; for (int k = 0, length = str.length(); k != length;) { char C = str.charAt(k); if (C != '%') { if (buf != null) { buf[bufTop++] = C; } ++k; } else { if (buf == null) { // decode always compress so result can not be bigger then // str.length() buf = new char[length]; str.getChars(0, k, buf, 0); bufTop = k; } int start = k; if (k + 3 > length) { throw new URISyntaxException(str, "Illegal URI format"); } int B = unHex(str.charAt(k + 1), str.charAt(k + 2)); if (B < 0) { throw new URISyntaxException(str, "Illegal URI format"); } k += 3; if ((B & 0x80) == 0) { C = (char)B; } else { // Decode UTF-8 sequence into ucs4Char and encode it into // UTF-16 int utf8Tail, ucs4Char, minUcs4Char; if ((B & 0xC0) == 0x80) { // First UTF-8 should be ouside 0x80..0xBF throw new URISyntaxException(str, "Illegal URI format"); } else if ((B & 0x20) == 0) { utf8Tail = 1; ucs4Char = B & 0x1F; minUcs4Char = 0x80; } else if ((B & 0x10) == 0) { utf8Tail = 2; ucs4Char = B & 0x0F; minUcs4Char = 0x800; } else if ((B & 0x08) == 0) { utf8Tail = 3; ucs4Char = B & 0x07; minUcs4Char = 0x10000; } else if ((B & 0x04) == 0) { utf8Tail = 4; ucs4Char = B & 0x03; minUcs4Char = 0x200000; } else if ((B & 0x02) == 0) { utf8Tail = 5; ucs4Char = B & 0x01; minUcs4Char = 0x4000000; } else { // First UTF-8 can not be 0xFF or 0xFE throw new URISyntaxException(str, "Illegal URI format"); } if (k + 3 * utf8Tail > length) { throw new URISyntaxException(str, "Illegal URI format"); } for (int j = 0; j != utf8Tail; j++) { if (str.charAt(k) != '%') { throw new URISyntaxException(str, "Illegal URI format"); } B = unHex(str.charAt(k + 1), str.charAt(k + 2)); if (B < 0 || (B & 0xC0) != 0x80) { throw new URISyntaxException(str, "Illegal URI format"); } ucs4Char = (ucs4Char << 6) | (B & 0x3F); k += 3; } // Check for overlongs and other should-not-present codes if (ucs4Char < minUcs4Char || ucs4Char == 0xFFFE || ucs4Char == 0xFFFF) { ucs4Char = 0xFFFD; } if (ucs4Char >= 0x10000) { ucs4Char -= 0x10000; if (ucs4Char > 0xFFFFF) { throw new URISyntaxException(str, "Illegal URI format"); } char H = (char)((ucs4Char >>> 10) + 0xD800); C = (char)((ucs4Char & 0x3FF) + 0xDC00); buf[bufTop++] = H; } else { C = (char)ucs4Char; } } if (fullUri && URI_DECODE_RESERVED.indexOf(C) >= 0) { for (int x = start; x != k; x++) { buf[bufTop++] = str.charAt(x); } } else { buf[bufTop++] = C; } } } return (buf == null) ? str : new String(buf, 0, bufTop); } private static boolean encodeUnescaped(char c, boolean fullUri) { if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9')) { return true; } if ("-_.!~*'()".indexOf(c) >= 0) return true; if (fullUri) { return URI_DECODE_RESERVED.indexOf(c) >= 0; } return false; } private static final String URI_DECODE_RESERVED = ";/?:@&=+$,#"; /* Convert one UCS-4 char and write it into a UTF-8 buffer, which must be * at least 6 bytes long. Return the number of UTF-8 bytes of data written. */ private static int oneUcs4ToUtf8Char(byte[] utf8Buffer, int ucs4Char) { int utf8Length = 1; //JS_ASSERT(ucs4Char <= 0x7FFFFFFF); if ((ucs4Char & ~0x7F) == 0) utf8Buffer[0] = (byte)ucs4Char; else { int i; int a = ucs4Char >>> 11; utf8Length = 2; while (a != 0) { a >>>= 5; utf8Length++; } i = utf8Length; while (--i > 0) { utf8Buffer[i] = (byte)((ucs4Char & 0x3F) | 0x80); ucs4Char >>>= 6; } utf8Buffer[0] = (byte)(0x100 - (1 << (8-utf8Length)) + ucs4Char); } return utf8Length; } public static String toSortable(Object o) { if (o instanceof Number) { long n; if (o instanceof Float || o instanceof Double) { n = Double.doubleToLongBits(((Number) o).doubleValue()); n ^= n < 0 ? 0xffffffffffffffffL : 0x8000000000000000L; } else { n = ((Number) o).longValue(); } return String.format(java.util.Locale.ROOT, "%016x", n); } else { return o.toString(); } } }