Reader.java example

Explorer
geotoolkit-master
/*
 *    Geotoolkit.org - An Open Source Java GIS Toolkit
 *    http://www.geotoolkit.org
 *
 *    (C) 2014, Geomatys
 *
 *    This library is free software; you can redistribute it and/or
 *    modify it under the terms of the GNU Lesser General Public
 *    License as published by the Free Software Foundation;
 *    version 2.1 of the License.
 *
 *    This library is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *    Lesser General Public License for more details.
 */
package org.geotoolkit.io.yaml;

import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.util.HashMap;
import java.text.ParseException;
import org.apache.sis.util.Numbers;
import org.apache.sis.util.Characters;
import org.apache.sis.util.CharSequences;


/**
 * Reads objects from the JSON format.
 *
 * @author Martin Desruisseaux (Geomatys)
 * @module
 */
final class Reader {
    /**
     * The factory to use for creating objects.
     */
    private final MetadataFactory factory;

    /**
     * The text to parse.
     */
    private final CharSequence text;

    /**
     * The current parse position.
     */
    private int position;

    /**
     * Index after the last character to parse.
     */
    private final int length;

    /**
     * Creates a new reader.
     *
     * @param text The text to parse.
     */
    Reader(final CharSequence text) {
        this.factory = MetadataFactory.DEFAULT; // Fixed for now, may be configurable in a future version.
        this.text    = text;
        this.length  = text.length();
    }

    /**
     * Advances the {@linkplain #position} to the next non-ignorable character.
     * This method skips all white-spaces.
     */
    private void skipIgnorables() {
        position = CharSequences.skipLeadingWhitespaces(text, position, length);
    }

    /**
     * Skips the list separator (a coma) if present. Other characters (in
     * particular the {@code endOfSequence} character) are not skipped.
     *
     * @param  endOfSequence The character ending the sequence.
     * @return {@code true} if there is more elements in the list.
     * @throws ParseException If an illegal character is found.
     */
    private boolean skipListSeparator(final char endOfSequence) throws ParseException {
        final char c = nextTokenStart();
        if (c == ',') {
            position++; // Skip the ',' character.
            return true;
        } else if (c == endOfSequence) {
            return false;
        } else {
            throw new ParseException("Illegal character: " + c, position);
        }
    }

    /**
     * Returns the first non-whitespace character, or thrown an exception if none.
     */
    private char nextTokenStart() throws ParseException {
        skipIgnorables();
        if (position < length) {
            return text.charAt(position);
        }
        throw new ParseException("Unexpected end of text.", length);
    }

    /**
     * Returns the next token with un-escaped characters. After this method call, the
     * {@linkplain #position} has been set to the first character after the returned token.
     *
     * @param  isQuoted {@code true} if the character at the current {@linkplain #position} is {@code '"'}.
     * @return The next token.
     */
    private CharSequence nextToken(final boolean isQuoted) throws ParseException {
        int i = position;
        if (isQuoted) {
            position++; // Skip the '"' character.
            do if ((i = CharSequences.indexOf(text, '"', i+1, length)) < 0) {
                throw new ParseException("Unexpected end of text.", length);
            } while (text.charAt(i-1) == '\\');
        } else {
            while (++i < length) {
                final char c = text.charAt(i);
                if (c == ',' || c == ':' || c == '\r' || c == '\n') {
                    break;
                }
            }
        }
        CharSequence token = unescape(text, position, i);
        if (isQuoted) {
            i++;
        } else if ("null".contentEquals(token)) {
            token = null;
        }
        position = i;
        return token;
    }

    /**
     * Parses the value. This method tries to return the value as an object of the given type,
     * but the type may sometime be different (for example a {@link String} or a {@link List}).
     *
     * @param  type The value type.
     * @return The parsed value, often (but not always) as an instance of {@code type}.
     * @throws ParseException If an error occurred while parsing.
     */
    final Object parse(final Class<?> type) throws ParseException {
        skipIgnorables();
        if (position >= length) {
            return null;
        }
        final char c = text.charAt(position);
        if (c == '[') {
            /*
             * We have an array of values. Parse all values and return them in a list.
             */
            position++; // Skip the '[' character.
            final List<Object> values = new ArrayList<>();
            boolean hasNext = (nextTokenStart() != ']');
            while (hasNext) {
                values.add(parse(type));
                hasNext = skipListSeparator(']');
            }
            position++; // Skip the ']' character.
            return values;
        }
        if (c != '{') {
            /*
             * We have a string, number or boolean. Find the end of value, either by looking
             * for the closing " or by looking for the separator (comma or line separator).
             */
            Object token = nextToken(c == '"');
            if (token != null) {
                if (Number.class.isAssignableFrom(type) || type == Boolean.class) {
                    token = Numbers.valueOf(token.toString(), type);
                }
            }
            return token;
        }
        /*
         * We have an object. Parse everything until the closing bracket.
         */
        final int objectStart = position++; // Skip the '{' character.
        final Map<String,Class<?>> definition = factory.getDefinition(type);
        final Map<String,Object> properties = new HashMap<>();
        boolean hasNext = (nextTokenStart() != '}');
        while (hasNext) {
            final int entryStart = position;
            final CharSequence token = nextToken(nextTokenStart() == '"');
            if (nextTokenStart() != ':') {
                throw new ParseException("Expected \"key: value\" pair.", position);
            }
            position++; // Skip the ':' character.
            final String key = String.valueOf(token);
            Class<?> childType = definition.get(key);
            if (childType == null) {
                childType = String.class; // TODO
            }
            if (properties.put(key, parse(childType)) != null) {
                throw new ParseException("Duplicated key: \"" + key + "\".", entryStart);
            }
            hasNext = skipListSeparator('}');
        }
        position++; // Skip the '}' character.
        return factory.create(type, definition, properties, objectStart);
    }

    /**
     * Replaces the escape sequences in a subregion of the given character string.
     * This method replaces the escape sequences defined by the Java language and JSON.
     * For any unknown escape sequence, this method just removes the backslash.
     */
    static CharSequence unescape(final CharSequence text, int start, final int stop) throws ParseException {
        int i = CharSequences.indexOf(text, '\\', start, stop);
        if (i < 0) {
            return text.subSequence(start, stop);
        }
        final StringBuilder buffer = new StringBuilder(stop - start - 1);
        do {
            buffer.append(text, start, i);
            if (++i >= stop) {
                throw new ParseException("Unexpected end of text.", stop);
            }
            final char c = text.charAt(i);
            final char r;
            start = ++i;
            switch (c) {
                default:  r =  c;   break;
                case 'b': r = '\b'; break;
                case 'f': r = '\f'; break;
                case 'n': r = '\n'; break;
                case 'r': r = '\r'; break;
                case 't': r = '\t'; break;
                case 'u': {
                    final int s = Math.min(start + Writer.UNICODE_LENGTH, stop);
                    while (i < s && Characters.isHexadecimal(text.charAt(i))) i++;
                    final String u = text.subSequence(start, i).toString();
                    try {
                        r = (char) Integer.parseInt(u, 16);
                    } catch (NumberFormatException e) {
                        throw new ParseException("Invalid character code: " + u, start);
                    }
                    start = i;
                    break;
                }
            }
            buffer.append(r);
        } while ((i = CharSequences.indexOf(text, '\\', start, stop)) >= 0);
        return buffer.append(text, start, stop).toString();
    }
}