MultiFormatParser.java example

Explorer
Time4J-master
/*
 * -----------------------------------------------------------------------
 * Copyright © 2013-2016 Meno Hochschild, <http://www.menodata.de/>
 * -----------------------------------------------------------------------
 * This file (MultiFormatParser.java) is part of project Time4J.
 *
 * Time4J is free software: You can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 2.1 of the License, or
 * (at your option) any later version.
 *
 * Time4J is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with Time4J. If not, see <http://www.gnu.org/licenses/>.
 * -----------------------------------------------------------------------
 */

package net.time4j.format.expert;

import net.time4j.engine.AttributeQuery;
import net.time4j.engine.ChronoEntity;

import java.lang.reflect.Array;
import java.text.ParseException;
import java.util.Arrays;
import java.util.List;


/**
 * <p>Serves for parsing of text input whose format is not yet known at compile time. </p>
 *
 * <p>User who only need to parse different formats for one locale only might consider the
 * simple alternative to concatenate all format pattern strings into one pattern with
 * the "|"-symbol as separator. </p>
 *
 * <p><strong>General notes about usage:</strong> </p>
 *
 * <p>a) If two patterns or formatters are combined then the order must be from the most complete
 * pattern/formatter to the least complete one. Example: Use "MM/dd/yyyy HH:mm|MM/dd/yyyy"
 * and not "MM/dd/yyyy|MM/dd/yyyy HH:mm". This is especially important if the formatter in
 * question use default values because the single components will be processed before evaluating any
 * default values (which is a late step in parsing). </p>
 *
 * <p>b) If two patterns/formatters have the same degree of completeness then that component should
 * be noted first which is more likely to be expected in input. </p>
 *
 * @param   <T> generic type of chronological entity
 * @author  Meno Hochschild
 * @since   3.14/4.11
 * @doctags.concurrency {immutable}
 */
/*[deutsch]
 * <p>Dient der Interpretation von Texteingaben, deren Format zur Kompilierzeit noch unbekannt ist. </p>
 *
 * <p>Anwender, die nur für eine Sprache verschiedene Formate interpretieren müssen, können
 * als Alternative auch das Zusammenziehen von Formatmustern in ein einziges Formatmuster in Betracht
 * ziehen, indem die einzelnen Formatmuster mit dem "|"-Symbol getrennt werden. </p>
 *
 * <p><strong>Allgemeine Bestimmungen zum Gebrauch:</strong> </p>
 *
 * <p>a) Wenn zwei Formatmuster oder Formatierer miteinander kombiniert werden, dann muß die
 * Reihenfolge so gewählt werden, daß das Formatmuster bzw. der Formatierer vorangeht, das/der
 * einen höheren Grad an Vollständigkeit besitzt. Beispiel: Verwende "MM/dd/yyyy HH:mm|MM/dd/yyyy"
 * und nicht "MM/dd/yyyy|MM/dd/yyyy HH:mm". Das ist besonders wichtig, wenn der
 * fragliche {@code ChronoFormatter} Standardwerte verwendet, weil die einzelnen Formatelemente
 * vor der Auswertung irgendwelcher Standardwerte zuerst ausgewertet werden. </p>
 *
 * <p>b) Falls zwei Formatmuster oder Formatierer den gleichen Grad an Vollständigkeit haben, dann sollte
 * das Formatmuster bzw. der Formatierer vorangehen, das in den zu erwartenden Eingabewerten wahrscheinlicher
 * zutrifft. </p>
 *
 * @param   <T> generic type of chronological entity
 * @author  Meno Hochschild
 * @since   3.14/4.11
 * @doctags.concurrency {immutable}
 */
public final class MultiFormatParser<T extends ChronoEntity<T>>
    implements ChronoParser<T> {

    //~ Instanzvariablen --------------------------------------------------

    private final ChronoFormatter<T>[] parsers;

    //~ Konstruktoren -----------------------------------------------------

    private MultiFormatParser(ChronoFormatter<T>[] parsers) {
        super();

        this.parsers = parsers;

        for (ChronoFormatter<T> parser : this.parsers) {
            if (parser == null) {
                throw new NullPointerException("Null format cannot be set.");
            }
        }

    }

    //~ Methoden ----------------------------------------------------------

    /**
     * <p>Creates a new multiple format parser. </p>
     *
     * @param   <T> generic type of chronological entity
     * @param   formats     array of multiple formats
     * @return  new immutable instance of MultiFormatParser
     * @since   3.14/4.11
     */
    /*[deutsch]
     * <p>Erzeugt einen neuen Multiformatinterpretierer. </p>
     *
     * @param   <T> generic type of chronological entity
     * @param   formats     array of multiple formats
     * @return  new immutable instance of MultiFormatParser
     * @since   3.14/4.11
     */
    @SafeVarargs
    public static <T extends ChronoEntity<T>> MultiFormatParser<T> of(ChronoFormatter<T>... formats) {

        ChronoFormatter<T>[] parsers = Arrays.copyOf(formats, formats.length);
        return new MultiFormatParser<>(parsers);

    }

    /**
     * <p>Creates a new multiple format parser. </p>
     *
     * @param   <T> generic type of chronological entity
     * @param   formats     list of multiple formats
     * @return  new immutable instance of MultiFormatParser
     * @since   3.14/4.11
     */
    /*[deutsch]
     * <p>Erzeugt einen neuen Multiformatinterpretierer. </p>
     *
     * @param   <T> generic type of chronological entity
     * @param   formats     list of multiple formats
     * @return  new immutable instance of MultiFormatParser
     * @since   3.14/4.11
     */
    @SuppressWarnings("unchecked")
    public static <T extends ChronoEntity<T>> MultiFormatParser<T> of(List<ChronoFormatter<T>> formats) {

        ChronoFormatter<T>[] parsers =
            formats.toArray((ChronoFormatter<T>[]) Array.newInstance(ChronoFormatter.class, formats.size()));
        return new MultiFormatParser<>(parsers);

    }

    /**
     * <p>Interpretes given text as chronological entity starting at the begin of text. </p>
     *
     * @param   text        text to be parsed
     * @return  parse result
     * @throws  IndexOutOfBoundsException if the text is empty
     * @throws  ParseException if the text is not parseable
     * @see     #parse(CharSequence, ParseLog)
     * @since   3.14/4.11
     */
    /*[deutsch]
     * <p>Interpretiert den angegebenen Text ab dem Anfang. </p>
     *
     * @param   text        text to be parsed
     * @return  parse result
     * @throws  IndexOutOfBoundsException if the text is empty
     * @throws  ParseException if the text is not parseable
     * @see     #parse(CharSequence, ParseLog)
     * @since   3.14/4.11
     */
    public T parse(CharSequence text)
        throws ParseException {

        ParseLog status = new ParseLog();

        for (int i = 0; i < this.parsers.length; i++) {
            status.reset(); // initialization
            status.setPosition(0);

            // use the default global attributes of every single parser
            T parsed = this.parsers[i].parse(text, status);

            if ((parsed != null) && !status.isError()) {
                if (this.parsers[i].isToleratingTrailingChars() || (status.getPosition() == text.length())) {
                    return parsed;
                }
            }

        }

        throw new ParseException("Not matched by any format: " + text, text.length());

    }

    /**
     * <p>Interpretes given text as chronological entity starting
     * at the specified position in parse log. </p>
     *
     * <p>Following example demonstrates best coding practice if used in processing bulk data: </p>
     *
     * <pre>
     *     static final MultiFormatParser<PlainDate> MULTI_FORMAT_PARSER;
     *
     *     static {
     *         ChronoFormatter<PlainDate> germanStyle =
     *              ChronoFormatter.ofDatePattern("d. MMMM uuuu", PatternType.CLDR, Locale.GERMAN);
     *         ChronoFormatter<PlainDate> frenchStyle =
     *              ChronoFormatter.ofDatePattern("d. MMMM uuuu", PatternType.CLDR, Locale.FRENCH);
     *         ChronoFormatter<PlainDate> usStyle =
     *              ChronoFormatter.ofDatePattern("MM/dd/uuuu", PatternType.CLDR, Locale.US);
     *         MULTI_FORMAT_PARSER = MultiFormatParser.of(germanStyle, frenchStyle, usStyle);
     *     }
     *
     *     public Collection<PlainDate> parse(Collection<String> data) {
     *         Collection<PlainDate> parsedDates = new ArrayList<>();
     *         ParseLog plog = new ParseLog();
     *         int index = 0;
     *
     *         for (String text : data) {
     *             PlainDate date = MULTI_FORMAT_PARSER.parse(text, plog);
     *             if ((date == null) || plog.isError()) {
     *                 // users are encouraged to use any good logging framework here
     *                 System.out.println("Wrong entry found: " + text + " at position " + index);
     *             } else {
     *                 parsedDates.add(date);
     *             }
     *             index++;
     *         }
     *
     *         return Collections.unmodifiableCollection(parsedDates);
     *     }
     * </pre>
     *
     * <p>Note: This method tolerates trailing characters. If this behaviour is not useful
     * then please consider the alternative method {@link #parse(CharSequence)}. </p>
     *
     * @param   text        text to be parsed
     * @param   status      parser information (always as new instance)
     * @return  result or {@code null} if parsing does not work
     * @throws  IndexOutOfBoundsException if the start position is at end of text or even behind
     * @since   3.14/4.11
     */
    /*[deutsch]
     * <p>Interpretiert den angegebenen Text ab der angegebenen Position im
     * Log. </p>
     *
     * <p>Folgendes Beispiel demonstriert eine sinnvolle Anwendung, wenn es um die Massenverarbeitung geht: </p>
     *
     * <pre>
     *     static final MultiFormatParser<PlainDate> MULTI_FORMAT_PARSER;
     *
     *     static {
     *         ChronoFormatter<PlainDate> germanStyle =
     *              ChronoFormatter.ofDatePattern("d. MMMM uuuu", PatternType.CLDR, Locale.GERMAN);
     *         ChronoFormatter<PlainDate> frenchStyle =
     *              ChronoFormatter.ofDatePattern("d. MMMM uuuu", PatternType.CLDR, Locale.FRENCH);
     *         ChronoFormatter<PlainDate> usStyle =
     *              ChronoFormatter.ofDatePattern("MM/dd/uuuu", PatternType.CLDR, Locale.US);
     *         MULTI_FORMAT_PARSER = MultiFormatParser.of(germanStyle, frenchStyle, usStyle);
     *     }
     *
     *     public Collection<PlainDate> parse(Collection<String> data) {
     *         Collection<PlainDate> parsedDates = new ArrayList<>();
     *         ParseLog plog = new ParseLog();
     *         int index = 0;
     *
     *         for (String text : data) {
     *             PlainDate date = MULTI_FORMAT_PARSER.parse(text, plog);
     *             if ((date == null) || plog.isError()) {
     *                 // Anwender werden ermuntert, ein gutes Logging-Framework ihrer Wahl hier zu verwenden
     *                 System.out.println("Wrong entry found: " + text + " at position " + index);
     *             } else {
     *                 parsedDates.add(date);
     *             }
     *             index++;
     *         }
     *
     *         return Collections.unmodifiableCollection(parsedDates);
     *     }
     * </pre>
     *
     * <p>Hinweis: Die Methode toleriert nicht interpretierte Zeichen am Textende. Wenn dieses Verhalten
     * nicht erwünscht ist, dann bitte die alternative Methode {@link #parse(CharSequence)} benutzen. </p>
     *
     * @param   text        text to be parsed
     * @param   status      parser information (always as new instance)
     * @return  result or {@code null} if parsing does not work
     * @throws  IndexOutOfBoundsException if the start position is at end of text or even behind
     * @since   3.14/4.11
     */
    public T parse(
        CharSequence text,
        ParseLog status
    ) {

        int start = status.getPosition();

        for (int i = 0; i < this.parsers.length; i++) {
            status.reset(); // initialization
            status.setPosition(start);

            // use the default global attributes of every single parser
            T parsed = this.parsers[i].parse(text, status);

            if ((parsed != null) && !status.isError()) {
                return parsed;
            }

        }

        status.setError(status.getErrorIndex(), "Not matched by any format: " + text);
        return null;

    }

    @Override
    public T parse(
        CharSequence text,
        ParseLog status,
        AttributeQuery attributes
    ) {

        int start = status.getPosition();

        for (int i = 0; i < this.parsers.length; i++) {
            status.reset(); // initialization
            status.setPosition(start);

            // use the default global attributes of every single parser,
            // possibly overridden by user-defined attributes
            T parsed = this.parsers[i].parse(text, status, attributes);

            if ((parsed != null) && !status.isError()) {
                return parsed;
            }

        }

        status.setError(status.getErrorIndex(), "Not matched by any format: " + text);
        return null;

    }

}