/*
* -----------------------------------------------------------------------
* Copyright © 2013-2016 Meno Hochschild, <http://www.menodata.de/>
* -----------------------------------------------------------------------
* This file (MultiFormatParser.java) is part of project Time4J.
*
* Time4J is free software: You can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 2.1 of the License, or
* (at your option) any later version.
*
* Time4J is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Time4J. If not, see <http://www.gnu.org/licenses/>.
* -----------------------------------------------------------------------
*/
package net.time4j.format.expert;
import net.time4j.engine.AttributeQuery;
import net.time4j.engine.ChronoEntity;
import java.lang.reflect.Array;
import java.text.ParseException;
import java.util.Arrays;
import java.util.List;
/**
* <p>Serves for parsing of text input whose format is not yet known at compile time. </p>
*
* <p>User who only need to parse different formats for one locale only might consider the
* simple alternative to concatenate all format pattern strings into one pattern with
* the "|"-symbol as separator. </p>
*
* <p><strong>General notes about usage:</strong> </p>
*
* <p>a) If two patterns or formatters are combined then the order must be from the most complete
* pattern/formatter to the least complete one. Example: Use "MM/dd/yyyy HH:mm|MM/dd/yyyy"
* and not "MM/dd/yyyy|MM/dd/yyyy HH:mm". This is especially important if the formatter in
* question use default values because the single components will be processed before evaluating any
* default values (which is a late step in parsing). </p>
*
* <p>b) If two patterns/formatters have the same degree of completeness then that component should
* be noted first which is more likely to be expected in input. </p>
*
* @param <T> generic type of chronological entity
* @author Meno Hochschild
* @since 3.14/4.11
* @doctags.concurrency {immutable}
*/
/*[deutsch]
* <p>Dient der Interpretation von Texteingaben, deren Format zur Kompilierzeit noch unbekannt ist. </p>
*
* <p>Anwender, die nur für eine Sprache verschiedene Formate interpretieren müssen, können
* als Alternative auch das Zusammenziehen von Formatmustern in ein einziges Formatmuster in Betracht
* ziehen, indem die einzelnen Formatmuster mit dem "|"-Symbol getrennt werden. </p>
*
* <p><strong>Allgemeine Bestimmungen zum Gebrauch:</strong> </p>
*
* <p>a) Wenn zwei Formatmuster oder Formatierer miteinander kombiniert werden, dann muß die
* Reihenfolge so gewählt werden, daß das Formatmuster bzw. der Formatierer vorangeht, das/der
* einen höheren Grad an Vollständigkeit besitzt. Beispiel: Verwende "MM/dd/yyyy HH:mm|MM/dd/yyyy"
* und nicht "MM/dd/yyyy|MM/dd/yyyy HH:mm". Das ist besonders wichtig, wenn der
* fragliche {@code ChronoFormatter} Standardwerte verwendet, weil die einzelnen Formatelemente
* vor der Auswertung irgendwelcher Standardwerte zuerst ausgewertet werden. </p>
*
* <p>b) Falls zwei Formatmuster oder Formatierer den gleichen Grad an Vollständigkeit haben, dann sollte
* das Formatmuster bzw. der Formatierer vorangehen, das in den zu erwartenden Eingabewerten wahrscheinlicher
* zutrifft. </p>
*
* @param <T> generic type of chronological entity
* @author Meno Hochschild
* @since 3.14/4.11
* @doctags.concurrency {immutable}
*/
public final class MultiFormatParser<T extends ChronoEntity<T>>
implements ChronoParser<T> {
//~ Instanzvariablen --------------------------------------------------
private final ChronoFormatter<T>[] parsers;
//~ Konstruktoren -----------------------------------------------------
private MultiFormatParser(ChronoFormatter<T>[] parsers) {
super();
this.parsers = parsers;
for (ChronoFormatter<T> parser : this.parsers) {
if (parser == null) {
throw new NullPointerException("Null format cannot be set.");
}
}
}
//~ Methoden ----------------------------------------------------------
/**
* <p>Creates a new multiple format parser. </p>
*
* @param <T> generic type of chronological entity
* @param formats array of multiple formats
* @return new immutable instance of MultiFormatParser
* @since 3.14/4.11
*/
/*[deutsch]
* <p>Erzeugt einen neuen Multiformatinterpretierer. </p>
*
* @param <T> generic type of chronological entity
* @param formats array of multiple formats
* @return new immutable instance of MultiFormatParser
* @since 3.14/4.11
*/
@SafeVarargs
public static <T extends ChronoEntity<T>> MultiFormatParser<T> of(ChronoFormatter<T>... formats) {
ChronoFormatter<T>[] parsers = Arrays.copyOf(formats, formats.length);
return new MultiFormatParser<>(parsers);
}
/**
* <p>Creates a new multiple format parser. </p>
*
* @param <T> generic type of chronological entity
* @param formats list of multiple formats
* @return new immutable instance of MultiFormatParser
* @since 3.14/4.11
*/
/*[deutsch]
* <p>Erzeugt einen neuen Multiformatinterpretierer. </p>
*
* @param <T> generic type of chronological entity
* @param formats list of multiple formats
* @return new immutable instance of MultiFormatParser
* @since 3.14/4.11
*/
@SuppressWarnings("unchecked")
public static <T extends ChronoEntity<T>> MultiFormatParser<T> of(List<ChronoFormatter<T>> formats) {
ChronoFormatter<T>[] parsers =
formats.toArray((ChronoFormatter<T>[]) Array.newInstance(ChronoFormatter.class, formats.size()));
return new MultiFormatParser<>(parsers);
}
/**
* <p>Interpretes given text as chronological entity starting at the begin of text. </p>
*
* @param text text to be parsed
* @return parse result
* @throws IndexOutOfBoundsException if the text is empty
* @throws ParseException if the text is not parseable
* @see #parse(CharSequence, ParseLog)
* @since 3.14/4.11
*/
/*[deutsch]
* <p>Interpretiert den angegebenen Text ab dem Anfang. </p>
*
* @param text text to be parsed
* @return parse result
* @throws IndexOutOfBoundsException if the text is empty
* @throws ParseException if the text is not parseable
* @see #parse(CharSequence, ParseLog)
* @since 3.14/4.11
*/
public T parse(CharSequence text)
throws ParseException {
ParseLog status = new ParseLog();
for (int i = 0; i < this.parsers.length; i++) {
status.reset(); // initialization
status.setPosition(0);
// use the default global attributes of every single parser
T parsed = this.parsers[i].parse(text, status);
if ((parsed != null) && !status.isError()) {
if (this.parsers[i].isToleratingTrailingChars() || (status.getPosition() == text.length())) {
return parsed;
}
}
}
throw new ParseException("Not matched by any format: " + text, text.length());
}
/**
* <p>Interpretes given text as chronological entity starting
* at the specified position in parse log. </p>
*
* <p>Following example demonstrates best coding practice if used in processing bulk data: </p>
*
* <pre>
* static final MultiFormatParser<PlainDate> MULTI_FORMAT_PARSER;
*
* static {
* ChronoFormatter<PlainDate> germanStyle =
* ChronoFormatter.ofDatePattern("d. MMMM uuuu", PatternType.CLDR, Locale.GERMAN);
* ChronoFormatter<PlainDate> frenchStyle =
* ChronoFormatter.ofDatePattern("d. MMMM uuuu", PatternType.CLDR, Locale.FRENCH);
* ChronoFormatter<PlainDate> usStyle =
* ChronoFormatter.ofDatePattern("MM/dd/uuuu", PatternType.CLDR, Locale.US);
* MULTI_FORMAT_PARSER = MultiFormatParser.of(germanStyle, frenchStyle, usStyle);
* }
*
* public Collection<PlainDate> parse(Collection<String> data) {
* Collection<PlainDate> parsedDates = new ArrayList<>();
* ParseLog plog = new ParseLog();
* int index = 0;
*
* for (String text : data) {
* PlainDate date = MULTI_FORMAT_PARSER.parse(text, plog);
* if ((date == null) || plog.isError()) {
* // users are encouraged to use any good logging framework here
* System.out.println("Wrong entry found: " + text + " at position " + index);
* } else {
* parsedDates.add(date);
* }
* index++;
* }
*
* return Collections.unmodifiableCollection(parsedDates);
* }
* </pre>
*
* <p>Note: This method tolerates trailing characters. If this behaviour is not useful
* then please consider the alternative method {@link #parse(CharSequence)}. </p>
*
* @param text text to be parsed
* @param status parser information (always as new instance)
* @return result or {@code null} if parsing does not work
* @throws IndexOutOfBoundsException if the start position is at end of text or even behind
* @since 3.14/4.11
*/
/*[deutsch]
* <p>Interpretiert den angegebenen Text ab der angegebenen Position im
* Log. </p>
*
* <p>Folgendes Beispiel demonstriert eine sinnvolle Anwendung, wenn es um die Massenverarbeitung geht: </p>
*
* <pre>
* static final MultiFormatParser<PlainDate> MULTI_FORMAT_PARSER;
*
* static {
* ChronoFormatter<PlainDate> germanStyle =
* ChronoFormatter.ofDatePattern("d. MMMM uuuu", PatternType.CLDR, Locale.GERMAN);
* ChronoFormatter<PlainDate> frenchStyle =
* ChronoFormatter.ofDatePattern("d. MMMM uuuu", PatternType.CLDR, Locale.FRENCH);
* ChronoFormatter<PlainDate> usStyle =
* ChronoFormatter.ofDatePattern("MM/dd/uuuu", PatternType.CLDR, Locale.US);
* MULTI_FORMAT_PARSER = MultiFormatParser.of(germanStyle, frenchStyle, usStyle);
* }
*
* public Collection<PlainDate> parse(Collection<String> data) {
* Collection<PlainDate> parsedDates = new ArrayList<>();
* ParseLog plog = new ParseLog();
* int index = 0;
*
* for (String text : data) {
* PlainDate date = MULTI_FORMAT_PARSER.parse(text, plog);
* if ((date == null) || plog.isError()) {
* // Anwender werden ermuntert, ein gutes Logging-Framework ihrer Wahl hier zu verwenden
* System.out.println("Wrong entry found: " + text + " at position " + index);
* } else {
* parsedDates.add(date);
* }
* index++;
* }
*
* return Collections.unmodifiableCollection(parsedDates);
* }
* </pre>
*
* <p>Hinweis: Die Methode toleriert nicht interpretierte Zeichen am Textende. Wenn dieses Verhalten
* nicht erwünscht ist, dann bitte die alternative Methode {@link #parse(CharSequence)} benutzen. </p>
*
* @param text text to be parsed
* @param status parser information (always as new instance)
* @return result or {@code null} if parsing does not work
* @throws IndexOutOfBoundsException if the start position is at end of text or even behind
* @since 3.14/4.11
*/
public T parse(
CharSequence text,
ParseLog status
) {
int start = status.getPosition();
for (int i = 0; i < this.parsers.length; i++) {
status.reset(); // initialization
status.setPosition(start);
// use the default global attributes of every single parser
T parsed = this.parsers[i].parse(text, status);
if ((parsed != null) && !status.isError()) {
return parsed;
}
}
status.setError(status.getErrorIndex(), "Not matched by any format: " + text);
return null;
}
@Override
public T parse(
CharSequence text,
ParseLog status,
AttributeQuery attributes
) {
int start = status.getPosition();
for (int i = 0; i < this.parsers.length; i++) {
status.reset(); // initialization
status.setPosition(start);
// use the default global attributes of every single parser,
// possibly overridden by user-defined attributes
T parsed = this.parsers[i].parse(text, status, attributes);
if ((parsed != null) && !status.isError()) {
return parsed;
}
}
status.setError(status.getErrorIndex(), "Not matched by any format: " + text);
return null;
}
}