/* * plist - An open source library to parse and generate property lists * Copyright (C) 2014 Daniel Dreibrodt * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package com.dd.plist; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.nio.CharBuffer; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; import java.text.ParseException; import java.text.StringCharacterIterator; import java.util.LinkedList; import java.util.List; /** * Parser for ASCII property lists. Supports Apple OS X/iOS and GnuStep/NeXTSTEP format. * This parser is based on the recursive descent paradigm, but the underlying grammar * is not explicitely defined. * <p/> * Resources on ASCII property list format: * <ul> * <li><a href="https://developer.apple.com/library/mac/#documentation/Cocoa/Conceptual/PropertyLists/OldStylePlists/OldStylePLists.html> * Property List Programming Guide - Old-Style ASCII Property Lists * </a></li> * <li><a href="http://www.gnustep.org/resources/documentation/Developer/Base/Reference/NSPropertyList.html"> * GnuStep - NSPropertyListSerialization class documentation * </a></li> * </ul> * * @author Daniel Dreibrodt */ public class ASCIIPropertyListParser { /** * Parses an ASCII property list file. * * @param f The ASCII property list file. * @return The root object of the property list. This is usally a NSDictionary but can also be a NSArray. * @throws Exception When an error occurs during parsing. */ public static NSObject parse(File f) throws IOException, ParseException { return parse(new FileInputStream(f)); } /** * Parses an ASCII property list from an input stream. * * @param in The input stream that points to the property list's data. * @return The root object of the property list. This is usally a NSDictionary but can also be a NSArray. * @throws Exception When an error occurs during parsing. */ public static NSObject parse(InputStream in) throws ParseException, IOException { byte[] buf = PropertyListParser.readAll(in); in.close(); return parse(buf); } /** * Parses an ASCII property list from a byte array. * * @param bytes The ASCII property list data. * @return The root object of the property list. This is usally a NSDictionary but can also be a NSArray. * @throws Exception When an error occurs during parsing. */ public static NSObject parse(byte[] bytes) throws ParseException { ASCIIPropertyListParser parser = new ASCIIPropertyListParser(bytes); return parser.parse(); } public static final char WHITESPACE_SPACE = ' '; public static final char WHITESPACE_TAB = '\t'; public static final char WHITESPACE_NEWLINE = '\n'; public static final char WHITESPACE_CARRIAGE_RETURN = '\r'; public static final char ARRAY_BEGIN_TOKEN = '('; public static final char ARRAY_END_TOKEN = ')'; public static final char ARRAY_ITEM_DELIMITER_TOKEN = ','; public static final char DICTIONARY_BEGIN_TOKEN = '{'; public static final char DICTIONARY_END_TOKEN = '}'; public static final char DICTIONARY_ASSIGN_TOKEN = '='; public static final char DICTIONARY_ITEM_DELIMITER_TOKEN = ';'; public static final char QUOTEDSTRING_BEGIN_TOKEN = '"'; public static final char QUOTEDSTRING_END_TOKEN = '"'; public static final char QUOTEDSTRING_ESCAPE_TOKEN = '\\'; public static final char DATA_BEGIN_TOKEN = '<'; public static final char DATA_END_TOKEN = '>'; public static final char DATA_GSOBJECT_BEGIN_TOKEN = '*'; public static final char DATA_GSDATE_BEGIN_TOKEN = 'D'; public static final char DATA_GSBOOL_BEGIN_TOKEN = 'B'; public static final char DATA_GSBOOL_TRUE_TOKEN = 'Y'; public static final char DATA_GSBOOL_FALSE_TOKEN = 'N'; public static final char DATA_GSINT_BEGIN_TOKEN = 'I'; public static final char DATA_GSREAL_BEGIN_TOKEN = 'R'; public static final char DATE_DATE_FIELD_DELIMITER = '-'; public static final char DATE_TIME_FIELD_DELIMITER = ':'; public static final char DATE_GS_DATE_TIME_DELIMITER = ' '; public static final char DATE_APPLE_DATE_TIME_DELIMITER = 'T'; public static final char DATE_APPLE_END_TOKEN = 'Z'; public static final char COMMENT_BEGIN_TOKEN = '/'; public static final char MULTILINE_COMMENT_SECOND_TOKEN = '*'; public static final char SINGLELINE_COMMENT_SECOND_TOKEN = '/'; public static final char MULTILINE_COMMENT_END_TOKEN = '/'; /** * Property list source data */ private byte[] data; /** * Current parsing index */ private int index; /** * Only allow subclasses to change instantiation. */ protected ASCIIPropertyListParser() { } /** * Creates a new parser for the given property list content. * * @param propertyListContent The content of the property list that is to be parsed. */ private ASCIIPropertyListParser(byte[] propertyListContent) { data = propertyListContent; } /** * Checks whether the given sequence of symbols can be accepted. * * @param sequence The sequence of tokens to look for. * @return Whether the given tokens occur at the current parsing position. */ private boolean acceptSequence(char... sequence) { for (int i = 0; i < sequence.length; i++) { if (data[index + i] != sequence[i]) return false; } return true; } /** * Checks whether the given symbols can be accepted, that is, if one * of the given symbols is found at the current parsing position. * * @param acceptableSymbols The symbols to check. * @return Whether one of the symbols can be accepted or not. */ private boolean accept(char... acceptableSymbols) { boolean symbolPresent = false; for (char c : acceptableSymbols) { if (data[index] == c) symbolPresent = true; } return symbolPresent; } /** * Checks whether the given symbol can be accepted, that is, if * the given symbols is found at the current parsing position. * * @param acceptableSymbol The symbol to check. * @return Whether the symbol can be accepted or not. */ private boolean accept(char acceptableSymbol) { return data[index] == acceptableSymbol; } /** * Expects the input to have one of the given symbols at the current parsing position. * * @param expectedSymbols The expected symbols. * @throws ParseException If none of the expected symbols could be found. */ private void expect(char... expectedSymbols) throws ParseException { if (!accept(expectedSymbols)) { String excString = "Expected '" + expectedSymbols[0] + "'"; for (int i = 1; i < expectedSymbols.length; i++) { excString += " or '" + expectedSymbols[i] + "'"; } excString += " but found '" + (char) data[index] + "'"; throw new ParseException(excString, index); } } /** * Expects the input to have the given symbol at the current parsing position. * * @param expectedSymbol The expected symbol. * @throws ParseException If the expected symbol could be found. */ private void expect(char expectedSymbol) throws ParseException { if (!accept(expectedSymbol)) throw new ParseException("Expected '" + expectedSymbol + "' but found '" + (char) data[index] + "'", index); } /** * Reads an expected symbol. * * @param symbol The symbol to read. * @throws ParseException If the expected symbol could not be read. */ private void read(char symbol) throws ParseException { expect(symbol); index++; } /** * Skips the current symbol. */ private void skip() { index++; } /** * Skips several symbols * * @param numSymbols The amount of symbols to skip. */ private void skip(int numSymbols) { index += numSymbols; } /** * Skips all whitespaces and comments from the current parsing position onward. */ private void skipWhitespacesAndComments() { boolean commentSkipped; do { commentSkipped = false; //Skip whitespaces while (accept(WHITESPACE_CARRIAGE_RETURN, WHITESPACE_NEWLINE, WHITESPACE_SPACE, WHITESPACE_TAB)) { skip(); } //Skip single line comments "//..." if (acceptSequence(COMMENT_BEGIN_TOKEN, SINGLELINE_COMMENT_SECOND_TOKEN)) { skip(2); readInputUntil(WHITESPACE_CARRIAGE_RETURN, WHITESPACE_NEWLINE); commentSkipped = true; } //Skip multi line comments "/* ... */" else if (acceptSequence(COMMENT_BEGIN_TOKEN, MULTILINE_COMMENT_SECOND_TOKEN)) { skip(2); while (true) { if (acceptSequence(MULTILINE_COMMENT_SECOND_TOKEN, MULTILINE_COMMENT_END_TOKEN)) { skip(2); break; } skip(); } commentSkipped = true; } } while (commentSkipped); //if a comment was skipped more whitespace or another comment can follow, so skip again } private String toUtf8String(ByteArrayOutputStream stream) { try { return stream.toString("UTF-8"); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } } /** * Reads input until one of the given symbols is found. * * @param symbols The symbols that can occur after the string to read. * @return The input until one the given symbols. */ private String readInputUntil(char... symbols) { ByteArrayOutputStream stringBytes = new ByteArrayOutputStream(); while (!accept(symbols)) { stringBytes.write(data[index]); skip(); } return toUtf8String(stringBytes); } /** * Reads input until the given symbol is found. * * @param symbol The symbol that can occur after the string to read. * @return The input until the given symbol. */ private String readInputUntil(char symbol) { ByteArrayOutputStream stringBytes = new ByteArrayOutputStream(); while (!accept(symbol)) { stringBytes.write(data[index]); skip(); } return toUtf8String(stringBytes); } /** * Parses the property list from the beginning and returns the root object * of the property list. * * @return The root object of the property list. This can either be a NSDictionary or a NSArray. * @throws ParseException When an error occured during parsing */ public NSObject parse() throws ParseException { index = 0; skipWhitespacesAndComments(); expect(DICTIONARY_BEGIN_TOKEN, ARRAY_BEGIN_TOKEN, COMMENT_BEGIN_TOKEN); try { return parseObject(); } catch (ArrayIndexOutOfBoundsException ex) { throw new ParseException("Reached end of input unexpectedly.", index); } } /** * Parses the NSObject found at the current position in the property list * data stream. * * @return The parsed NSObject. * @see ASCIIPropertyListParser#index */ private NSObject parseObject() throws ParseException { switch (data[index]) { case ARRAY_BEGIN_TOKEN: { return parseArray(); } case DICTIONARY_BEGIN_TOKEN: { return parseDictionary(); } case DATA_BEGIN_TOKEN: { return parseData(); } case QUOTEDSTRING_BEGIN_TOKEN: { String quotedString = parseQuotedString(); //apple dates are quoted strings of length 20 and after the 4 year digits a dash is found if (quotedString.length() == 20 && quotedString.charAt(4) == DATE_DATE_FIELD_DELIMITER) { try { return new NSDate(quotedString); } catch (Exception ex) { //not a date? --> return string return new NSString(quotedString); } } else { return new NSString(quotedString); } } default: { //0-9 if (data[index] > 0x2F && data[index] < 0x3A) { //could be a date or just a string return parseDateString(); } else { //non-numerical -> string or boolean String parsedString = parseString(); return new NSString(parsedString); } } } } /** * Parses an array from the current parsing position. * The prerequisite for calling this method is, that an array begin token has been read. * * @return The array found at the parsing position. */ private NSArray parseArray() throws ParseException { //Skip begin token skip(); skipWhitespacesAndComments(); List<NSObject> objects = new LinkedList<NSObject>(); while (!accept(ARRAY_END_TOKEN)) { objects.add(parseObject()); skipWhitespacesAndComments(); if (accept(ARRAY_ITEM_DELIMITER_TOKEN)) { skip(); } else { break; //must have reached end of array } skipWhitespacesAndComments(); } //parse end token read(ARRAY_END_TOKEN); return new NSArray(objects.toArray(new NSObject[objects.size()])); } /** * Parses a dictionary from the current parsing position. * The prerequisite for calling this method is, that a dictionary begin token has been read. * * @return The dictionary found at the parsing position. */ private NSDictionary parseDictionary() throws ParseException { //Skip begin token skip(); skipWhitespacesAndComments(); NSDictionary dict = new NSDictionary(); while (!accept(DICTIONARY_END_TOKEN)) { //Parse key String keyString; if (accept(QUOTEDSTRING_BEGIN_TOKEN)) { keyString = parseQuotedString(); } else { keyString = parseString(); } skipWhitespacesAndComments(); //Parse assign token read(DICTIONARY_ASSIGN_TOKEN); skipWhitespacesAndComments(); NSObject object = parseObject(); dict.put(keyString, object); skipWhitespacesAndComments(); read(DICTIONARY_ITEM_DELIMITER_TOKEN); skipWhitespacesAndComments(); } //skip end token skip(); return dict; } /** * Parses a data object from the current parsing position. * This can either be a NSData object or a GnuStep NSNumber or NSDate. * The prerequisite for calling this method is, that a data begin token has been read. * * @return The data object found at the parsing position. */ private NSObject parseData() throws ParseException { NSObject obj = null; //Skip begin token skip(); if (accept(DATA_GSOBJECT_BEGIN_TOKEN)) { skip(); expect(DATA_GSBOOL_BEGIN_TOKEN, DATA_GSDATE_BEGIN_TOKEN, DATA_GSINT_BEGIN_TOKEN, DATA_GSREAL_BEGIN_TOKEN); if (accept(DATA_GSBOOL_BEGIN_TOKEN)) { //Boolean skip(); expect(DATA_GSBOOL_TRUE_TOKEN, DATA_GSBOOL_FALSE_TOKEN); if (accept(DATA_GSBOOL_TRUE_TOKEN)) { obj = new NSNumber(true); } else { obj = new NSNumber(false); } //Skip the parsed boolean token skip(); } else if (accept(DATA_GSDATE_BEGIN_TOKEN)) { //Date skip(); String dateString = readInputUntil(DATA_END_TOKEN); obj = new NSDate(dateString); } else if (accept(DATA_GSINT_BEGIN_TOKEN, DATA_GSREAL_BEGIN_TOKEN)) { //Number skip(); String numberString = readInputUntil(DATA_END_TOKEN); obj = new NSNumber(numberString); } //parse data end token read(DATA_END_TOKEN); } else { String dataString = readInputUntil(DATA_END_TOKEN); dataString = dataString.replaceAll("\\s+", ""); int numBytes = dataString.length() / 2; byte[] bytes = new byte[numBytes]; for (int i = 0; i < bytes.length; i++) { String byteString = dataString.substring(i * 2, i * 2 + 2); int byteValue = Integer.parseInt(byteString, 16); bytes[i] = (byte) byteValue; } obj = new NSData(bytes); //skip end token skip(); } return obj; } /** * Attempts to parse a plain string as a date if possible. * * @return A NSDate if the string represents such an object. Otherwise a NSString is returned. */ private NSObject parseDateString() { String numericalString = parseString(); if (numericalString.length() > 4 && numericalString.charAt(4) == DATE_DATE_FIELD_DELIMITER) { try { return new NSDate(numericalString); } catch(Exception ex) { //An exception occurs if the string is not a date but just a string } } return new NSString(numericalString); } /** * Parses a plain string from the current parsing position. * The string is made up of all characters to the next whitespace, delimiter token or assignment token. * * @return The string found at the current parsing position. */ private String parseString() { return readInputUntil(WHITESPACE_SPACE, WHITESPACE_TAB, WHITESPACE_NEWLINE, WHITESPACE_CARRIAGE_RETURN, ARRAY_ITEM_DELIMITER_TOKEN, DICTIONARY_ITEM_DELIMITER_TOKEN, DICTIONARY_ASSIGN_TOKEN, ARRAY_END_TOKEN); } /** * Parses a quoted string from the current parsing position. * The prerequisite for calling this method is, that a quoted string begin token has been read. * * @return The quoted string found at the parsing method with all special characters unescaped. * @throws ParseException If an error occured during parsing. */ private String parseQuotedString() throws ParseException { //Skip begin token skip(); ByteArrayOutputStream quotedString = new ByteArrayOutputStream(); boolean unescapedBackslash = true; //Read from opening quotation marks to closing quotation marks and skip escaped quotation marks while (data[index] != QUOTEDSTRING_END_TOKEN || (data[index - 1] == QUOTEDSTRING_ESCAPE_TOKEN && unescapedBackslash)) { quotedString.write(data[index]); if (accept(QUOTEDSTRING_ESCAPE_TOKEN)) { unescapedBackslash = !(data[index - 1] == QUOTEDSTRING_ESCAPE_TOKEN && unescapedBackslash); } skip(); } String unescapedString; try { unescapedString = parseQuotedString(toUtf8String(quotedString)); } catch (Exception ex) { throw new ParseException("The quoted string could not be parsed.", index); } //skip end token skip(); return unescapedString; } /** * Used to encode the parsed strings */ private static CharsetEncoder asciiEncoder; /** * Parses a string according to the format specified for ASCII property lists. * Such strings can contain escape sequences which are unescaped in this method. * * @param s The escaped string according to the ASCII property list format, without leading and trailing quotation marks. * @return The unescaped string in UTF-8 or ASCII format, depending on the contained characters. * @throws Exception If the string could not be properly parsed. */ public static synchronized String parseQuotedString(String s) throws UnsupportedEncodingException, CharacterCodingException { StringBuilder parsed = new StringBuilder(); StringCharacterIterator iterator = new StringCharacterIterator(s); char c = iterator.current(); while (iterator.getIndex() < iterator.getEndIndex()) { switch (c) { case '\\': { //An escaped sequence is following parsed.append(parseEscapedSequence(iterator)); break; } default: { parsed.append(c); break; } } c = iterator.next(); } return parsed.toString(); } /** * Unescapes an escaped character sequence, e.g. \\u00FC. * * @param iterator The string character iterator pointing to the first character after the backslash * @return The unescaped character */ private static char parseEscapedSequence(StringCharacterIterator iterator) { char c = iterator.next(); if (c == 'b') { return '\b'; } else if (c == 'n') { return '\n'; } else if (c == 'r') { return '\r'; } else if (c == 't') { return '\t'; } else if (c == 'U' || c == 'u') { //4 digit hex Unicode value String byte1 = ""; byte1 += iterator.next(); byte1 += iterator.next(); String byte2 = ""; byte2 += iterator.next(); byte2 += iterator.next(); return (char) ((Integer.parseInt(byte1, 16) << 8) + Integer.parseInt(byte2, 16)); } else if ((c >= '0') && (c <= '7')) { //3 digit octal ASCII value String num = ""; num += c; num += iterator.next(); num += iterator.next(); return (char) Integer.parseInt(num, 8); } else { // Possibly something that needn't be escaped, but we should accept it // it anyway for consistency with Apple tools. return c; } } }