package org.activityinfo.core.shared.type.converter;
import com.bedatadriven.rebar.time.calendar.LocalDate;
import javax.annotation.Nonnull;
import java.util.Arrays;
/**
* Parses strings to local dates
*/
public class StringToLocalDateConverter implements StringConverter<LocalDate> {
private static final String SEPARATORS = "-/\\, \t.";
public static final int NUM_COMPONENTS = 3;
private static final int JAN = 1;
private static final int FEB = 2;
private static final int MARCH = 3;
private static final int APRIL = 4;
private static final int MAY = 5;
private static final int JUNE = 6;
private static final int JULY = 7;
private static final int AUG = 8;
private static final int SEPT = 9;
private static final int OCT = 10;
private static final int NOV = 11;
private static final int DEC = 12;
private static final int PIVOT_YEAR = 50;
private static final int MIN_MONTH_NAME_LENGTH = 2;
public static final int NOT_FOUND = -1;
@Nonnull
@Override
public LocalDate convert(@Nonnull String string) {
// basically we expect three components, in SOME format
// that are separated by something normal
int components[] = new int[NUM_COMPONENTS];
int charIndex = 0;
int component = 0;
// if we stumble across the month name,
// make sure we note this.
int monthIndex = NOT_FOUND;
while(component < NUM_COMPONENTS) {
if(charIndex >= string.length()) {
throw new IllegalArgumentException("Not enough components in '" + string + "', found: " +
Arrays.toString(components));
}
int start = charIndex;
char c = string.charAt(start);
if(Character.isDigit(c)) {
// read all the digits in
do {
charIndex++;
}
while(isDigit(string, charIndex));
// parse as number
components[component] = Integer.parseInt(string.substring(start, charIndex));
// move on the next component
component++;
} else {
// read until we hit a separator or digit
do {
charIndex++;
} while(isPartOfWord(string, charIndex));
int monthNameLength = charIndex-start;
if(monthNameLength > MIN_MONTH_NAME_LENGTH) {
int month = tryParseLatinMonthName(string.substring(start, charIndex));
if(month != NOT_FOUND) {
components[component] = month;
monthIndex = component;
component++;
}
}
}
// advance through any separator chars
while(isSeparator(string, charIndex)) {
charIndex++;
}
}
if(monthIndex != -1) {
return parseUsingKnownMonthPosition(components, monthIndex);
} else {
// try to find the obvious year
int yearIndex = findYearIndex(components, -1);
if(yearIndex == -1) {
// if we can't find a 4-digit year, we can only assume that it comes at
// the end in some completely ambiguous form like 3/4/12
yearIndex = 2;
}
return parseUsingKnownYearPosition(string, components, yearIndex);
}
}
private LocalDate parseUsingKnownYearPosition(String string, int[] components, int yearIndex) {
if(yearIndex == 0) {
// usually YYYY-MM-dd
if(monthAndDayMatch(components, 1, 2)) {
return toDate(components, yearIndex, 1, 2);
} else {
return toDate(components, yearIndex, 2, 1);
}
} else if(yearIndex == 1) {
// date in the middle?? 31-2000-12 ?? i don't think so...
throw new IllegalArgumentException(string);
} else {
// the classic ambiguous 5/3/2007
if(monthAndDayMatch(components, 1, 0)) {
return toDate(components, yearIndex, 1, 0);
} else {
return toDate(components, yearIndex, 0, 1);
}
}
}
private boolean monthAndDayMatch(int[] components, int monthIndex, int dayIndex) {
int month = components[monthIndex];
if(month > 12) {
return false;
}
if(components[dayIndex] > getMaxDaysInMonth(month)) {
return false;
}
return true;
}
private LocalDate parseUsingKnownMonthPosition(int components[], int monthIndex) {
int yearIndex = findYearIndex(components, monthIndex);
if(yearIndex != -1) {
int dayIndex = remainingIndex(monthIndex, yearIndex);
return new LocalDate(components[yearIndex], components[monthIndex], components[dayIndex]);
} else {
// best guess
if(monthIndex == 1) {
// usually 31st May 12
return toDate(components, 2, monthIndex, 1);
} else {
// who knows...
return toDate(components, 1, monthIndex, 2);
}
}
}
private LocalDate toDate(int[] components, int yearIndex, int monthIndex, int dayIndex) {
int year = components[yearIndex];
if(isTwoDigits(year)) {
year += inferCentury(year);
}
return new LocalDate(year, components[monthIndex], components[dayIndex]);
}
private int inferCentury(int year) {
if(year < PIVOT_YEAR) {
return 2000;
} else {
return 1900;
}
}
private boolean isTwoDigits(int year) {
return year < 1000;
}
private int remainingIndex(int monthIndex, int yearIndex) {
for(int i=0;i!=NUM_COMPONENTS;++i) {
if(i != monthIndex && i != yearIndex) {
return i;
}
}
throw new IllegalStateException();
}
private int findYearIndex(int components[], int monthIndex) {
int maxDaysInThisMonth = monthIndex == -1 ? 31 : getMaxDaysInMonth(components[monthIndex]);
for(int i=0;i!=NUM_COMPONENTS;++i) {
if(i != monthIndex) {
if(components[i] > maxDaysInThisMonth) {
return i;
}
}
}
return NOT_FOUND;
}
private int getMaxDaysInMonth(int month) {
if(month == 2) {
return 29;
} else if(month == APRIL || month == JUNE || month == SEPT || month == NOV) {
return 30;
} else {
return 31;
}
}
/**
* Tries to parse a string as a month name in any language using
* a series of hand-tuned heuristics.
*
* @param string the string, with a length of at least {@code MIN_MONTH_NAME_LENGTH}
* @return a month index, 1-12, or {@code NOT_FOUND} if there is no match
*/
private int tryParseLatinMonthName(String string) {
String lowered = string.toLowerCase();
switch(lowered.charAt(0)) {
case 'e':
return JAN; // enero
case 'f':
return FEB; // februrary, febrero
case 'm':
if(hasAny(lowered, 'y', 'i', 'g')) {
return MAY; // may, maio, mei, mayo, mag (it)
} else {
return MARCH; // march, marzo, marco, marz
}
case 'a':
if(hasAny(lowered, 'g', 'o')) {
return AUG;
} else if(hasAny(lowered, 'b', 'p', 'v')) {
return APRIL;
}
break;
case 'i':
case 'j':
if(hasChar(lowered, 'a')) {
return JAN; // january, januar, januari,
} else if(hasChar(lowered, 'n')) {
return JUNE; // june, juni, junio
} else if(hasAny(lowered, 'i', 'y', 'l')) {
return JULY; // july, julio, juli
}
break;
case 'l':
if(lowered.charAt(1) == 'u') {
return JULY; // luglio (it)
}
case 's':
return SEPT; // september, septiembre, etc
case 'o':
return OCT; // oktober, october, octubre
case 'n':
if(lowered.indexOf('v') != -1) {
return NOV; // november, noviembre,
}
break;
case 'd':
return DEC;
case 'g':
// italian
if(lowered.indexOf('e') != -1) {
return JAN; // genn
} else if(lowered.indexOf('u') != -1) {
return JUNE; // giugno
}
}
return NOT_FOUND;
}
private boolean hasAny(String lowered, char i, char b, char c) {
return lowered.indexOf(i) != -1 || lowered.indexOf(b) != -1 || lowered.indexOf(c) != -1;
}
private boolean hasChar(String lowered, char a) {
return lowered.indexOf(a) != -1;
}
private boolean hasAny(String lowered, char a, char b) {
return lowered.indexOf(a) != -1 || lowered.indexOf(b) != -1;
}
private boolean isSeparator(String string, int charIndex) {
if(charIndex < string.length()) {
return SEPARATORS.indexOf(string.charAt(charIndex)) != -1;
} else {
return false;
}
}
private boolean isDigit(String string, int charIndex) {
if(charIndex < string.length()) {
return Character.isDigit(string.charAt(charIndex));
} else {
return false;
}
}
private boolean isPartOfWord(String string, int charIndex) {
if(charIndex < string.length()) {
return Character.isLetter(string.charAt(charIndex));
} else {
return false;
}
}
}