/* * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package jdk.nashorn.internal.parser; import static java.lang.Character.DECIMAL_DIGIT_NUMBER; import static java.lang.Character.LOWERCASE_LETTER; import static java.lang.Character.OTHER_PUNCTUATION; import static java.lang.Character.SPACE_SEPARATOR; import static java.lang.Character.UPPERCASE_LETTER; import java.util.HashMap; import java.util.Locale; /** * JavaScript date parser. This class first tries to parse a date string * according to the extended ISO 8601 format specified in ES5 15.9.1.15. * If that fails, it falls back to legacy mode in which it accepts a range * of different formats. * * <p>This class is neither thread-safe nor reusable. Calling the * <tt>parse()</tt> method more than once will yield undefined results.</p> */ public class DateParser { /** Constant for index position of parsed year value. */ public final static int YEAR = 0; /** Constant for index position of parsed month value. */ public final static int MONTH = 1; /** Constant for index position of parsed day value. */ public final static int DAY = 2; /** Constant for index position of parsed hour value. */ public final static int HOUR = 3; /** Constant for index position of parsed minute value. */ public final static int MINUTE = 4; /** Constant for index position of parsed second value. */ public final static int SECOND = 5; /** Constant for index position of parsed millisecond value. */ public final static int MILLISECOND = 6; /** Constant for index position of parsed time zone offset value. */ public final static int TIMEZONE = 7; private enum Token { UNKNOWN, NUMBER, SEPARATOR, PARENTHESIS, NAME, SIGN, END } private final String string; private final int length; private final Integer[] fields; private int pos = 0; private Token token; private int tokenLength; private Name nameValue; private int numValue; private int currentField = YEAR; private int yearSign = 0; private boolean namedMonth = false; private final static HashMap<String,Name> names = new HashMap<>(); static { addName("monday", Name.DAY_OF_WEEK, 0); addName("tuesday", Name.DAY_OF_WEEK, 0); addName("wednesday", Name.DAY_OF_WEEK, 0); addName("thursday", Name.DAY_OF_WEEK, 0); addName("friday", Name.DAY_OF_WEEK, 0); addName("saturday", Name.DAY_OF_WEEK, 0); addName("sunday", Name.DAY_OF_WEEK, 0); addName("january", Name.MONTH_NAME, 1); addName("february", Name.MONTH_NAME, 2); addName("march", Name.MONTH_NAME, 3); addName("april", Name.MONTH_NAME, 4); addName("may", Name.MONTH_NAME, 5); addName("june", Name.MONTH_NAME, 6); addName("july", Name.MONTH_NAME, 7); addName("august", Name.MONTH_NAME, 8); addName("september", Name.MONTH_NAME, 9); addName("october", Name.MONTH_NAME, 10); addName("november", Name.MONTH_NAME, 11); addName("december", Name.MONTH_NAME, 12); addName("am", Name.AM_PM, 0); addName("pm", Name.AM_PM, 12); addName("z", Name.TIMEZONE_ID, 0); addName("gmt", Name.TIMEZONE_ID, 0); addName("ut", Name.TIMEZONE_ID, 0); addName("utc", Name.TIMEZONE_ID, 0); addName("est", Name.TIMEZONE_ID, -5 * 60); addName("edt", Name.TIMEZONE_ID, -4 * 60); addName("cst", Name.TIMEZONE_ID, -6 * 60); addName("cdt", Name.TIMEZONE_ID, -5 * 60); addName("mst", Name.TIMEZONE_ID, -7 * 60); addName("mdt", Name.TIMEZONE_ID, -6 * 60); addName("pst", Name.TIMEZONE_ID, -8 * 60); addName("pdt", Name.TIMEZONE_ID, -7 * 60); addName("t", Name.TIME_SEPARATOR, 0); } /** * Construct a new <tt>DateParser</tt> instance for parsing the given string. * @param string the string to be parsed */ public DateParser(final String string) { this.string = string; this.length = string.length(); this.fields = new Integer[TIMEZONE + 1]; } /** * Try parsing the given string as date according to the extended ISO 8601 format * specified in ES5 15.9.1.15. Fall back to legacy mode if that fails. * This method returns <tt>true</tt> if the string could be parsed. * @return true if the string could be parsed as date */ public boolean parse() { return parseEcmaDate() || parseLegacyDate(); } /** * Try parsing the date string according to the rules laid out in ES5 15.9.1.15. * The date string must conform to the following format: * * <pre> [('-'|'+')yy]yyyy[-MM[-dd]][Thh:mm[:ss[.sss]][Z|(+|-)hh:mm]] </pre> * * <p>If the string does not contain a time zone offset, the <tt>TIMEZONE</tt> field * is set to <tt>0</tt> (GMT).</p> * @return true if string represents a valid ES5 date string. */ public boolean parseEcmaDate() { if (token == null) { token = next(); } while (token != Token.END) { switch (token) { case NUMBER: if (currentField == YEAR && yearSign != 0) { // 15.9.1.15.1 Extended year must have six digits if (tokenLength != 6) { return false; } numValue *= yearSign; } else if (!checkEcmaField(currentField, numValue)) { return false; } if (!skipEcmaDelimiter()) { return false; } if (currentField < TIMEZONE) { set(currentField++, numValue); } break; case NAME: if (nameValue == null) { return false; } switch (nameValue.type) { case Name.TIME_SEPARATOR: if (currentField == YEAR || currentField > HOUR) { return false; } currentField = HOUR; break; case Name.TIMEZONE_ID: if (!nameValue.key.equals("z") || !setTimezone(nameValue.value, false)) { return false; } break; default: return false; } break; case SIGN: if (peek() == -1) { // END after sign - wrong! return false; } if (currentField == YEAR) { yearSign = numValue; } else if (currentField < SECOND || !setTimezone(readTimeZoneOffset(), true)) { // Note: Spidermonkey won't parse timezone unless time includes seconds and milliseconds return false; } break; default: return false; } token = next(); } return patchResult(true); } /** * Try parsing the date using a fuzzy algorithm that can handle a variety of formats. * * <p>Numbers separated by <tt>':'</tt> are treated as time values, optionally followed by a * millisecond value separated by <tt>'.'</tt>. Other number values are treated as date values. * The exact sequence of day, month, and year values to apply is determined heuristically.</p> * * <p>English month names and selected time zone names as well as AM/PM markers are recognized * and handled properly. Additionally, numeric time zone offsets such as <tt>(+|-)hh:mm</tt> or * <tt>(+|-)hhmm</tt> are recognized. If the string does not contain a time zone offset * the <tt>TIMEZONE</tt>field is left undefined, meaning the local time zone should be applied.</p> * * <p>English weekday names are recognized but ignored. All text in parentheses is ignored as well. * All other text causes parsing to fail.</p> * * @return true if the string could be parsed */ public boolean parseLegacyDate() { if (yearSign != 0 || currentField > DAY) { // we don't support signed years in legacy mode return false; } if (token == null) { token = next(); } while (token != Token.END) { switch (token) { case NUMBER: if (skipDelimiter(':')) { // A number followed by ':' is parsed as time if (!setTimeField(numValue)) { return false; } // consume remaining time tokens do { token = next(); if (token != Token.NUMBER || !setTimeField(numValue)) { return false; } } while (skipDelimiter(isSet(SECOND) ? '.' : ':')); } else { // Parse as date token if (!setDateField(numValue)) { return false; } skipDelimiter('-'); } break; case NAME: if (nameValue == null) { return false; } switch (nameValue.type) { case Name.AM_PM: if (!setAmPm(nameValue.value)) { return false; } break; case Name.MONTH_NAME: if (!setMonth(nameValue.value)) { return false; } break; case Name.TIMEZONE_ID: if (!setTimezone(nameValue.value, false)) { return false; } break; case Name.TIME_SEPARATOR: return false; default: break; } if (nameValue.type != Name.TIMEZONE_ID) { skipDelimiter('-'); } break; case SIGN: if (peek() == -1) { // END after sign - wrong! return false; } if (!setTimezone(readTimeZoneOffset(), true)) { return false; } break; case PARENTHESIS: if (!skipParentheses()) { return false; } break; case SEPARATOR: break; default: return false; } token = next(); } return patchResult(false); } /** * Get the parsed date and time fields as an array of <tt>Integers</tt>. * * <p>If parsing was successful, all fields are guaranteed to be set except for the * <tt>TIMEZONE</tt> field which may be <tt>null</tt>, meaning that local time zone * offset should be applied.</p> * * @return the parsed date fields */ public Integer[] getDateFields() { return fields; } private boolean isSet(final int field) { return fields[field] != null; } private Integer get(final int field) { return fields[field]; } private void set(final int field, final int value) { fields[field] = value; } private int peek() { return pos < length ? string.charAt(pos) : -1; } // Skip delimiter if followed by a number. Used for ISO 8601 formatted dates private boolean skipNumberDelimiter(final char c) { if (pos < length - 1 && string.charAt(pos) == c && Character.getType(string.charAt(pos + 1)) == DECIMAL_DIGIT_NUMBER) { token = null; pos++; return true; } return false; } private boolean skipDelimiter(final char c) { if (pos < length && string.charAt(pos) == c) { token = null; pos++; return true; } return false; } private Token next() { if (pos >= length) { tokenLength = 0; return Token.END; } final char c = string.charAt(pos); if (c > 0x80) { tokenLength = 1; pos++; return Token.UNKNOWN; // We only deal with ASCII here } final int type = Character.getType(c); switch (type) { case DECIMAL_DIGIT_NUMBER: numValue = readNumber(6); return Token.NUMBER; case SPACE_SEPARATOR : case OTHER_PUNCTUATION: tokenLength = 1; pos++; return Token.SEPARATOR; case UPPERCASE_LETTER: case LOWERCASE_LETTER: nameValue = readName(); return Token.NAME; default: tokenLength = 1; pos++; switch (c) { case '(': return Token.PARENTHESIS; case '-': case '+': numValue = c == '-' ? -1 : 1; return Token.SIGN; default: return Token.UNKNOWN; } } } private static boolean checkLegacyField(final int field, final int value) { switch (field) { case HOUR: return isHour(value); case MINUTE: case SECOND: return isMinuteOrSecond(value); case MILLISECOND: return isMillisecond(value); default: // skip validation on other legacy fields as we don't know what's what return true; } } private boolean checkEcmaField(final int field, final int value) { switch (field) { case YEAR: return tokenLength == 4; case MONTH: return tokenLength == 2 && isMonth(value); case DAY: return tokenLength == 2 && isDay(value); case HOUR: return tokenLength == 2 && isHour(value); case MINUTE: case SECOND: return tokenLength == 2 && isMinuteOrSecond(value); case MILLISECOND: // we allow millisecond to be less than 3 digits return tokenLength < 4 && isMillisecond(value); default: return true; } } private boolean skipEcmaDelimiter() { switch (currentField) { case YEAR: case MONTH: return skipNumberDelimiter('-') || peek() == 'T' || peek() == -1; case DAY: return peek() == 'T' || peek() == -1; case HOUR: case MINUTE: return skipNumberDelimiter(':') || endOfTime(); case SECOND: return skipNumberDelimiter('.') || endOfTime(); default: return true; } } private boolean endOfTime() { final int c = peek(); return c == -1 || c == 'Z' || c == '-' || c == '+' || c == ' '; } private static boolean isAsciiLetter(final char ch) { return ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z'); } private static boolean isAsciiDigit(final char ch) { return '0' <= ch && ch <= '9'; } private int readNumber(final int maxDigits) { final int start = pos; int n = 0; final int max = Math.min(length, pos + maxDigits); while (pos < max && isAsciiDigit(string.charAt(pos))) { n = n * 10 + string.charAt(pos++) - '0'; } tokenLength = pos - start; return n; } private Name readName() { final int start = pos; final int limit = Math.min(pos + 3, length); // first read up to the key length while (pos < limit && isAsciiLetter(string.charAt(pos))) { pos++; } final String key = string.substring(start, pos).toLowerCase(Locale.ENGLISH); final Name name = names.get(key); // then advance to end of name while (pos < length && isAsciiLetter(string.charAt(pos))) { pos++; } tokenLength = pos - start; // make sure we have the full name or a prefix if (name != null && name.matches(string, start, tokenLength)) { return name; } return null; } private int readTimeZoneOffset() { final int sign = string.charAt(pos - 1) == '+' ? 1 : -1; int offset = readNumber(2); skipDelimiter(':'); offset = offset * 60 + readNumber(2); return sign * offset; } private boolean skipParentheses() { int parenCount = 1; while (pos < length && parenCount != 0) { final char c = string.charAt(pos++); if (c == '(') { parenCount++; } else if (c == ')') { parenCount--; } } return true; } private static int getDefaultValue(final int field) { switch (field) { case MONTH: case DAY: return 1; default: return 0; } } private static boolean isDay(final int n) { return 1 <= n && n <= 31; } private static boolean isMonth(final int n) { return 1 <= n && n <= 12; } private static boolean isHour(final int n) { return 0 <= n && n <= 24; } private static boolean isMinuteOrSecond(final int n) { return 0 <= n && n < 60; } private static boolean isMillisecond(final int n) { return 0<= n && n < 1000; } private boolean setMonth(final int m) { if (!isSet(MONTH)) { namedMonth = true; set(MONTH, m); return true; } return false; } private boolean setDateField(final int n) { for (int field = YEAR; field != HOUR; field++) { if (!isSet(field)) { // no validation on legacy date fields set(field, n); return true; } } return false; } private boolean setTimeField(final int n) { for (int field = HOUR; field != TIMEZONE; field++) { if (!isSet(field)) { if (checkLegacyField(field, n)) { set(field, n); return true; } return false; } } return false; } private boolean setTimezone(final int offset, final boolean asNumericOffset) { if (!isSet(TIMEZONE) || (asNumericOffset && get(TIMEZONE) == 0)) { set(TIMEZONE, offset); return true; } return false; } private boolean setAmPm(final int offset) { if (!isSet(HOUR)) { return false; } final int hour = get(HOUR); if (hour >= 0 && hour <= 12) { set(HOUR, hour + offset); } return true; } private boolean patchResult(final boolean strict) { // sanity checks - make sure we have something if (!isSet(YEAR) && !isSet(HOUR)) { return false; } if (isSet(HOUR) && !isSet(MINUTE)) { return false; } // fill in default values for unset fields except timezone for (int field = YEAR; field <= TIMEZONE; field++) { if (get(field) == null) { if (field == TIMEZONE && !strict) { // We only use UTC as default timezone for dates parsed complying with // the format specified in ES5 15.9.1.15. Otherwise the slot is left empty // and local timezone is used. continue; } final int value = getDefaultValue(field); set(field, value); } } if (!strict) { // swap year, month, and day if it looks like the right thing to do if (isDay(get(YEAR))) { final int d = get(YEAR); set(YEAR, get(DAY)); if (namedMonth) { // d-m-y set(DAY, d); } else { // m-d-y final int d2 = get(MONTH); set(MONTH, d); set(DAY, d2); } } // sanity checks now that we know what's what if (!isMonth(get(MONTH)) || !isDay(get(DAY))) { return false; } // add 1900 or 2000 to year if it's between 0 and 100 final int year = get(YEAR); if (year >= 0 && year < 100) { set(YEAR, year >= 50 ? 1900 + year : 2000 + year); } } else { // 24 hour value is only allowed if all other time values are zero if (get(HOUR) == 24 && (get(MINUTE) != 0 || get(SECOND) != 0 || get(MILLISECOND) != 0)) { return false; } } // set month to 0-based set(MONTH, get(MONTH) - 1); return true; } private static void addName(final String str, final int type, final int value) { final Name name = new Name(str, type, value); names.put(name.key, name); } private static class Name { final String name; final String key; final int value; final int type; final static int DAY_OF_WEEK = -1; final static int MONTH_NAME = 0; final static int AM_PM = 1; final static int TIMEZONE_ID = 2; final static int TIME_SEPARATOR = 3; Name(final String name, final int type, final int value) { assert name != null; assert name.equals(name.toLowerCase(Locale.ENGLISH)); this.name = name; // use first three characters as lookup key this.key = name.substring(0, Math.min(3, name.length())); this.type = type; this.value = value; } public boolean matches(final String str, final int offset, final int len) { return name.regionMatches(true, 0, str, offset, len); } @Override public String toString() { return name; } } }