/* * Copyright 2009-2012 Alan Kennedy * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package com.xhaus.jyson; import org.python.core.*; public class JysonDecoder { /** Controls whether the Jyson decoder accepts data other than object or array at the top level */ public boolean accept_any_primary_datum = false; /** Controls whether the Jyson decoder accepts dangling commas on arrays and dicts ("[1,2,3,]" or "{"hello":"world",}") */ public boolean accept_dangling_commas = false; /** Controls whether the Jyson decoder accepts shell style comments (" # This is a comment ") */ public boolean accept_shell_style_comments = false; /** Controls whether the Jyson decoder accepts quotes delimited with single quote characters ('is this a string?')*/ public boolean accept_single_quoted_strings = false; /** Controls whether the Jyson decoder accepts hexadecimal character escapes ("A" == "0x41") */ public boolean accept_hex_char_escapes = false; /** Controls whether the Jyson decoder accepts hexadecimal integer constants (255 == 0xFF == 0xff) */ public boolean accept_hexadecimal_integers = false; /** Controls whether the Jyson decoder accepts octal integer constants (0100 == 64 == 0x40) */ public boolean accept_octal_integers = false; /** Controls whether the Jyson decoder accepts extraneous data after primary expression */ public boolean accept_junk_after_data = false; protected int curr_pos; protected String json_text; protected JysonDecoder(String s) { curr_pos = 0; json_text = s; } private void reset_defaults() { accept_any_primary_datum = false; accept_dangling_commas = false; accept_shell_style_comments = false; accept_single_quoted_strings = false; accept_hex_char_escapes = false; accept_hexadecimal_integers = false; accept_octal_integers = false; accept_junk_after_data = false; } /** * Set the JysonDecoder into STRICT mode. In strict mode, the Jyson decoder will NOT accept * * <ol> * <li>A primary datum other than an object or an array ("{}" or "[]")</li> * <li>Dangling commas on objects or arrays ('{"1": 2,}' or "[1,]")</li> * <li>Shell style comments (" # This is a comment ")</li> * <li>Single quoted strings ('is this a string?')</li> * <li>Hexadecimal character escapes ("A" == "0x41")</li> * <li>Hexadecimal integer constants (255 == 0xFF == 0xff)</li> * <li>Octal integer constants (0100 == 64 == 0x40)</li> * <li>Junk after primary expression ('[1] "extra data"')</li> * </ol> * */ public void strict_mode() { reset_defaults(); } /** * Set the JysonDecoder into PERMISSIVE mode. In permissive mode, the Jyson decoder WILL accept * * <ol> * <li>Any primary datum, i.e. data that is not an object or an array ("1")</li> * <li>Dangling commas on objects or arrays ('{"1": 2,}' or "[1,]")</li> * <li>Shell style comments (" # This is a comment ")</li> * <li>Single quoted strings ('is this a string?')</li> * <li>Hexadecimal character escapes ("A" == "0x41")</li> * <li>Hexadecimal integer constants (255 == 0xFF == 0xff)</li> * <li>Octal integer constants (0100 == 64 == 0x40)</li> * <li>Junk after primary expression ('[1] "extra data"')</li> * </ol> * */ public void permissive_mode() { accept_any_primary_datum = true; accept_dangling_commas = true; accept_shell_style_comments = true; accept_single_quoted_strings = true; accept_hex_char_escapes = true; accept_hexadecimal_integers = true; accept_octal_integers = true; accept_junk_after_data = true; } protected void push() { if (curr_pos > 0) { curr_pos -= 1; } } protected char get_char ( ) { if (curr_pos < json_text.length()) return json_text.charAt(curr_pos++); else return 0; } protected String get_chars ( int n, String desc ) throws JSONDecodeError { try { String next = json_text.substring(curr_pos, curr_pos+n); curr_pos += n; return next; } catch (IndexOutOfBoundsException ioobe) { throw decode_exception("Ran out of characters reading "+desc); } } protected char get_data_char() throws JSONDecodeError { while (true) { char c = get_char(); switch (c) { case '/': switch (get_char()) { case '/': do { c = get_char(); } while (c != '\n' && c != '\r' && c != 0); while (c == '\n' || c == '\r') c = get_char(); if (c != 0) push(); break; case '*': while (true) { c = get_char(); if (c == 0) { throw decode_exception("Unclosed comment."); } if (c == '*') { if (get_char() == '/') { break; } push(); } } break; default: push(); return '/'; } break; case '#': if (accept_shell_style_comments) { do { c = get_char(); } while (c != '\n' && c != '\r' && c != 0); while (c == '\n' || c == '\r') c = get_char(); if (c != 0) push(); } else throw decode_exception("Shell style comments are not accepted"); break; case 0: return c; default: if (c > ' ') return c; } } } protected String get_string(char quote) throws JSONDecodeError { char c; StringBuffer buf = new StringBuffer(); while (true) { c = get_char(); switch (c) { case '\\': c = get_char(); switch (c) { case 'b': buf.append('\b'); break; case 'f': buf.append('\f'); break; case 'n': buf.append('\n'); break; case 'r': buf.append('\r'); break; case 't': buf.append('\t'); break; case '\\': buf.append('\\'); break; case '"': buf.append('"'); break; case '/': buf.append('/'); break; case 'u': String unichars = get_chars(4, "Unicode escape"); try { buf.append((char)Integer.parseInt(unichars, 16)); } catch (NumberFormatException nfx) { throw decode_exception("Illegal character in unicode hex constant: " + unichars); } break; case 'x' : if (accept_hex_char_escapes) buf.append((char) Integer.parseInt(get_chars(2, "Hexadecimal escape"), 16)); else throw decode_exception("Hexadecimal escapes for characters are not accepted"); break; default: throw decode_exception("Illegal escape character: '"+c+"'"); } break; case 0: case '\n': case '\r': throw decode_exception("Line terminators must be escaped inside strings"); case '\'': case '"': if (c == quote) { return buf.toString(); } // else let it flow into the default case default: buf.append(c); } } } protected PyObject decode_constant ( String s ) throws JSONDecodeError { if (s.compareTo("true") == 0) { return Py.True; } if (s.compareTo("false") == 0) { return Py.False; } if (s.compareTo("null") == 0) { return Py.None; } if (s.length() == 0) { throw decode_exception("No value specified"); } return null; } protected PyObject decode_number ( String s ) throws JSONDecodeError { char first = s.charAt(0); if (Character.isDigit(first) || ".-+".indexOf(first) != -1) { if (first == '0' && s.length() > 1) { if (s.charAt(1) == 'x' || s.charAt(1) == 'X') { if (accept_hexadecimal_integers) { String hexchars = s.substring(2); try { return new PyInteger(Integer.parseInt(hexchars, 16)); } catch (NumberFormatException nfx) { throw decode_exception("Format error in hexadecimal constant: " + hexchars); } } else throw decode_exception("Hexadecimal integers are not accepted."); } if (s.charAt(1) != '.') { if (accept_octal_integers) { try { return new PyInteger(Integer.parseInt(s, 8)); } catch (NumberFormatException nfx) { throw decode_exception("Format error in octal constant: " + s); } } else { throw decode_exception("Octal integers are not accepted."); } } } String possible_number = s; if (possible_number.charAt(0) == '+') possible_number = possible_number.substring(1); try { return new PyInteger(Integer.parseInt(possible_number)); } catch (Exception e) {} try { return new PyLong(possible_number); } catch (Exception e) {} try { double result = Double.parseDouble(possible_number); return new PyFloat(result); } catch (NumberFormatException nfx) {} } return null; } protected PyStringMap get_json_object( ) throws JSONDecodeError { char c; String key; PyStringMap json_object = new PyStringMap(); // PyStringMaps accept only string keys, like JSON while (true) { c = get_data_char(); switch (c) { case 0: throw decode_exception("A JSON object must end with '}'"); case '}': return json_object; case '\'': if (accept_single_quoted_strings) key = get_string('\''); else throw decode_exception("Single quoted strings are not acceptable in JSON"); break; case '"': key = get_string('"'); break; default: throw decode_exception("Only strings are acceptable as object keys in JSON"); } c = get_data_char(); if (c != ':') { throw decode_exception("Object keys and values must be separated by ':'"); } PyObject value = get_object(); json_object.__setitem__(new PyUnicode(key), value); switch (get_data_char()) { case ',': if (get_data_char() == '}') { if (accept_dangling_commas) return json_object; else throw decode_exception("Commas after last entry of object not accepted"); } push(); break; case '}': return json_object; default: throw decode_exception("Expected a ',' or '}'"); } } } protected PyList get_json_array () throws JSONDecodeError { char next = get_data_char(); if (next == 0) { throw decode_exception("Ran out of characters reading array"); } PyList json_array = new PyList(); if ( next == ']') { return json_array; } push(); while (true) { if (get_data_char() == ',') { throw decode_exception("Arrays may not contain consecutive or dangling commas"); } push(); json_array.append(get_object()); switch (get_data_char()) { case 0: { throw decode_exception("Ran out of characters reading array"); } case ',': if (get_data_char() == ']') { if (accept_dangling_commas) return json_array; else throw decode_exception("Commas after last element of array not accepted"); } push(); break; case ']': return json_array; default: throw decode_exception("Array elements must be followed by ',' or ']'"); } } } protected PyObject get_object() throws JSONDecodeError { char c = get_data_char(); switch (c) { case '{': return get_json_object(); case '[': return get_json_array(); case '"': return new PyUnicode(get_string(c)); case '\'': if (accept_single_quoted_strings) return new PyUnicode(get_string(c)); else throw decode_exception("Single quoted strings are not accepted"); } // OK, we have unquoted text. Try to figure out what to do with it StringBuffer buf = new StringBuffer(); while (c >= ' ' && ",:]}/\\[{#".indexOf(c) == -1) { buf.append(c); c = get_char(); } if (c != 0) push(); String s = buf.toString().trim(); // Check if it is one of the known constants PyObject result = decode_constant(s); if (result != null) return result; // Try to convert it to a number result = decode_number(s); if (result != null) return result; throw decode_exception("Unable to decode '"+s+"'"); } protected PyObject get_top_level_object ( ) throws JSONDecodeError { PyObject result = get_object(); if (!(result instanceof PyStringMap || result instanceof PyList) && !accept_any_primary_datum) throw decode_exception("JSON expressions must strictly be either objects or lists"); char ch = get_data_char(); if (ch != 0 && !accept_junk_after_data) throw decode_exception("Only whitespace is permitted after the primary datum: not '"+ch+"'"); return result; } protected JSONDecodeError decode_exception(String message) { return new JSONDecodeError(message+": position="+curr_pos); } }