// Copyright 2013 Michel Kraemer
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package de.undercouch.citeproc.bibtex;
import java.text.DateFormatSymbols;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.lang3.StringUtils;
import de.undercouch.citeproc.csl.CSLDate;
import de.undercouch.citeproc.csl.CSLDateBuilder;
/**
* Parses dates
* @author Michel Kraemer
*/
public class DateParser {
/**
* A cache for month names
* @see #getMonthNames(Locale)
*/
private static Map<Locale, Map<String, Integer>> MONTH_NAMES_CACHE =
new ConcurrentHashMap<>();
/**
* Converts a given date string to a {@link CSLDate} object. Does
* not parse the string but saves it in the CSLDate's raw field.
* @param dateString the string
* @return the {@link CSLDate} object
*/
public static CSLDate toDate(String dateString) {
return new CSLDateBuilder().raw(dateString).build();
}
/**
* Parses the given year and month to a {@link CSLDate} object. Handles
* date ranges such as <code>xx-xx</code> or <code>xx/xx</code> and even
* <code>xx-xx/yy-yy</code>.
* @param year the year to parse. Should be a four-digit number or a String
* whose last four characters are digits.
* @param month the month to parse. May be a number (<code>1-12</code>),
* a short month name (<code>Jan</code> to <code>Dec</code>), or a
* long month name (<code>January</code> to <code>December</code>). This
* method is also able to recognize month names in several locales.
* @return the {@link CSLDate} object or null if both, the year and the
* month, could not be parsed
*/
public static CSLDate toDate(String year, String month) {
//check if there are several dates, parse each of them
//individually and merge them afterwards
String[] ms = null;
if (month != null) {
ms = month.split("/");
}
String[] ys = null;
if (year != null) {
ys = year.split("/");
}
if (ys != null && ys.length > 1) {
//even if there is a month parse year only to avoid ambiguities
CSLDate d1 = toDateRange(ys[0], null);
CSLDate d2 = toDateRange(ys[ys.length - 1], null);
//only merge if the difference between the years is not greater than 1
if (d1.getDateParts() != null && d2.getDateParts() != null &&
d1.getDateParts().length > 0 && d2.getDateParts().length > 0 &&
d1.getDateParts()[0].length > 0 && d2.getDateParts()[d2.getDateParts().length - 1].length > 0 &&
Math.abs(d2.getDateParts()[0][0] - d1.getDateParts()[d2.getDateParts().length - 1][0]) <= 1) {
return merge(d1, d2);
}
} else if (ms != null && ms.length > 1) {
CSLDate d1 = toDateRange(year, ms[0]);
CSLDate d2 = toDateRange(year, ms[1]);
//only merge if the difference between the months is not greater than 1
if (d1.getDateParts() != null && d2.getDateParts() != null &&
d1.getDateParts().length > 0 && d2.getDateParts().length > 0 &&
d1.getDateParts()[0].length > 1 && d2.getDateParts()[d2.getDateParts().length - 1].length > 1 &&
Math.abs(d2.getDateParts()[0][1] - d1.getDateParts()[d2.getDateParts().length - 1][1]) <= 1) {
return merge(d1, d2);
}
}
return toDateRange(year, month);
}
/**
* Parses the given year and month to a {@link CSLDate} object. Handles
* date ranges such as <code>xx-xx</code>.
* @param year the year to parse. Should be a four-digit number or a String
* whose last four characters are digits.
* @param month the month to parse. May be a number (<code>1-12</code>),
* a short month name (<code>Jan</code> to <code>Dec</code>), or a
* long month name (<code>January</code> to <code>December</code>). This
* method is also able to recognize month names in several locales.
* @return the {@link CSLDate} object or null if both, the year and the
* month, could not be parsed
*/
public static CSLDate toDateRange(String year, String month) {
//check if there's a date range, parse elements
//individually and merge them afterwards
String[] ms = null;
if (month != null) {
ms = month.split("-+|\u2013+");
}
String[] ys = null;
if (year != null) {
ys = year.split("-+|\u2013+");
}
if (ys != null && ys.length > 1) {
//even if there is a month parse year only to avoid ambiguities
CSLDate d1 = toDateSingle(ys[0], null);
CSLDate d2 = toDateSingle(ys[ys.length - 1], null);
return merge(d1, d2);
} else if (ms != null && ms.length > 1) {
CSLDate d1 = toDateSingle(year, ms[0]);
CSLDate d2 = toDateSingle(year, ms[1]);
return merge(d1, d2);
}
return toDateSingle(year, month);
}
/**
* Parses the given year and month to a {@link CSLDate} object. Does not
* handle ranges.
* @param year the year to parse. Should be a four-digit number or a String
* whose last four characters are digits.
* @param month the month to parse. May be a number (<code>1-12</code>),
* a short month name (<code>Jan</code> to <code>Dec</code>), or a
* long month name (<code>January</code> to <code>December</code>). This
* method is also able to recognize month names in several locales.
* @return the {@link CSLDate} object or null if both, the year and the
* month, could not be parsed
*/
public static CSLDate toDateSingle(String year, String month) {
int m = toMonth(month);
//parse year
int y = -1;
Boolean circa = null;
if (year != null && year.length() >= 4) {
if (StringUtils.isNumeric(year)) {
y = Integer.parseInt(year);
} else {
String fourDigit = year.substring(year.length() - 4);
if (StringUtils.isNumeric(fourDigit)) {
y = Integer.parseInt(fourDigit);
if (year.length() > 4) {
circa = Boolean.TRUE;
}
}
}
}
//create result
CSLDateBuilder builder = new CSLDateBuilder();
if (y < 0) {
return null;
}
if (m < 0) {
return builder.dateParts(y).circa(circa).build();
}
return builder.dateParts(y, m).circa(circa).build();
}
/**
* Merges two dates
* @param d1 the first date
* @param d2 the second date
* @return the merged date
*/
private static CSLDate merge(CSLDate d1, CSLDate d2) {
if (d1 == null) {
return d2;
} else if (d2 == null) {
return d1;
}
CSLDateBuilder builder = new CSLDateBuilder();
//handle date parts
builder.dateParts(d1.getDateParts()[0], d2.getDateParts()[d2.getDateParts().length - 1]);
//handle circa
if (d1.getCirca() != null) {
builder.circa(d1.getCirca());
}
if (d2.getCirca() != null && (d1.getCirca() == null || d2.getCirca().booleanValue())) {
builder.circa(d2.getCirca());
}
//handle literal strings
if (d1.getLiteral() != null) {
builder.literal(d1.getLiteral());
}
if (d2.getLiteral() != null) {
if (d1.getLiteral() != null) {
builder.literal(d1.getLiteral() + "-" + d2.getLiteral());
} else {
builder.literal(d2.getLiteral());
}
}
//handle seasons
if (d1.getSeason() != null) {
builder.season(d1.getSeason());
}
if (d2.getSeason() != null) {
if (d1.getSeason() != null) {
builder.season(d1.getSeason() + "-" + d2.getSeason());
} else {
builder.season(d2.getSeason());
}
}
//handle raw strings
if (d1.getRaw() != null) {
builder.raw(d1.getRaw());
}
if (d2.getRaw() != null) {
if (d1.getRaw() != null) {
builder.raw(d1.getRaw() + "-" + d2.getRaw());
} else {
builder.raw(d2.getRaw());
}
}
return builder.build();
}
/**
* Parses the given month string
* @param month the month to parse. May be a number (<code>1-12</code>),
* a short month name (<code>Jan</code> to <code>Dec</code>), or a
* long month name (<code>January</code> to <code>December</code>). This
* method is also able to recognize month names in several locales.
* @return the month's number (<code>1-12</code>) or <code>-1</code> if
* the string could not be parsed
*/
public static int toMonth(String month) {
int m = -1;
if (month != null && !month.isEmpty()) {
if (StringUtils.isNumeric(month)) {
m = Integer.parseInt(month);
if (m < 1 || m > 12) {
//invalid month
m = -1;
}
} else {
m = tryParseMonth(month, Locale.ENGLISH);
if (m <= 0) {
m = tryParseMonth(month, Locale.getDefault());
if (m <= 0) {
for (Locale l : Locale.getAvailableLocales()) {
m = tryParseMonth(month, l);
if (m > 0) {
break;
}
}
}
}
}
}
return m;
}
/**
* Retrieves and caches a list of month names for a given locale
* @param locale the locale
* @return the list of month names (short and long). All names are
* converted to upper case
*/
private static Map<String, Integer> getMonthNames(Locale locale) {
Map<String, Integer> r = MONTH_NAMES_CACHE.get(locale);
if (r == null) {
DateFormatSymbols symbols = DateFormatSymbols.getInstance(locale);
r = new HashMap<>(24);
//insert long month names
String[] months = symbols.getMonths();
for (int i = 0; i < months.length; ++i) {
String m = months[i];
if (!m.isEmpty()) {
r.put(m.toUpperCase(), i + 1);
}
}
//insert short month names
String[] shortMonths = symbols.getShortMonths();
for (int i = 0; i < shortMonths.length; ++i) {
String m = shortMonths[i];
if (!m.isEmpty()) {
r.put(m.toUpperCase(), i + 1);
}
}
MONTH_NAMES_CACHE.put(locale, r);
}
return r;
}
/**
* Tries to parse the given month string using the month names
* of the given locale
* @param month the month string
* @param locale the locale
* @return the month's number (<code>1-12</code>) or <code>-1</code> if
* the string could not be parsed
*/
private static int tryParseMonth(String month, Locale locale) {
Map<String, Integer> names = getMonthNames(locale);
Integer r = names.get(month.toUpperCase());
if (r != null) {
return r;
}
return -1;
}
}