package gov.nysenate.openleg.processor.hearing;
import gov.nysenate.openleg.util.PublicHearingTextUtils;
import org.springframework.stereotype.Service;
import java.time.LocalDate;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@Service
public class PublicHearingDateParser
{
private static Pattern START_TIME = Pattern.compile("\\d+:\\d{2} [ap].m.");
private static Pattern DATE_TIME = Pattern.compile("(?<date>\\w+ \\d{1,2}, \\d{4})(( at)? " +
"(?<startTime>\\d{1,2}:\\d{2} [ap].m.)" +
"( to (?<endTime>\\d{1,2}:\\d{2} [ap].m.))?)?");
private DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern("MMMM d, yyyy");
private DateTimeFormatter dayOfWeekDateFormatter = DateTimeFormatter.ofPattern("EEEE, MMMM d, yyyy");
private DateTimeFormatter timeFormatter = DateTimeFormatter.ofPattern("h:mm a");
/**
* Extract a LocalDate from the first page of a PublicHearing.
* @param firstPage list of Strings containing the first page of text.
* @return LocalDate the date of this public hearing.
*/
public LocalDate parseDate(List<String> firstPage) {
Matcher matcher = getDateTimeMatcher(firstPage);
matcher.find();
return LocalDate.parse(matcher.group("date"), dateFormatter);
}
public LocalTime parseStartTime(List<String> firstPage) {
Matcher matcher = getDateTimeMatcher(firstPage);
matcher.find();
String startTime = matcher.group("startTime");
if (startTime == null) {
return null;
}
startTime = formatAmPm(startTime);
return LocalTime.parse(startTime, timeFormatter);
}
public LocalTime parseEndTime(List<String> firstPage) {
Matcher matcher = getDateTimeMatcher(firstPage);
matcher.find();
String endTime = matcher.group("endTime");
if (endTime == null) {
return null;
}
endTime = formatAmPm(endTime);
return LocalTime.parse(endTime, timeFormatter);
}
private Matcher getDateTimeMatcher(List<String> firstPage) {
String dateTime = getDateTimeString(firstPage);
return DATE_TIME.matcher(dateTime);
}
/**
* Finds the Strings containing date and time information.
* Concatenates these Strings into
* a "<code>MMMM d, yyyy h:mm a to h:mm a</code>" formatted single String.
* @param firstPage
* @return A String containing date time information.
*/
private String getDateTimeString(List<String> firstPage) {
firstPage = formatLines(firstPage);
for (int i = 0; i < firstPage.size(); i++) {
String line = firstPage.get(i);
if (containsDate(line)) {
return line + " " + getTimeString(firstPage.get(i + 1));
}
if (containsDayOfWeekAndDate(line)) {
// Remove the weekday.
line = line.replaceFirst("\\w+, ", "");
return line + " " + getTimeString(firstPage.get(i + 1));
}
if (containsDateAndTime(line)) {
//March 12, 2014, at 10:00 a.m.
return line.replaceFirst(", at", "");
}
}
return null;
}
/** Returns the String containing time information.
* If no time exists return null.*/
private String getTimeString(String line) {
if (START_TIME.matcher(line).find()) {
return line;
}
return null;
}
/**
* Determines if the given String contains date time information.
* Matches date Strings like: April 5, 2014.
* @param line
* @return
*/
private boolean containsDate(String line) {
try {
dateFormatter.parse(line);
return true;
}
catch (DateTimeParseException ex) {
// Ignore
}
return false;
}
/**
* Determines if the given String contains date time information.
* Matches date Strings like: Tuesday, April 5, 2014.
*/
private boolean containsDayOfWeekAndDate(String line) {
try {
dayOfWeekDateFormatter.parse(line);
return true;
}
catch (DateTimeParseException ex) {
// Ignore
}
return false;
}
/**
* Determines if the given String contains date time information.
* Matches date Strings like: March 12, 2014, at 10:00 a.m.
*/
private boolean containsDateAndTime(String line) {
String singleLineDate = "(\\w+ \\d+, \\d+)(, at \\d+:\\d+ [apm.]{4})";
if (line.matches(singleLineDate)) {
return true;
}
return false;
}
/** Removes Line numbers, excess whitespace, new line, and non text characters */
private List<String> formatLines(List<String> lines) {
List<String> formattedLines = new ArrayList<>();
for (String line : lines) {
line = removeLineNumbers(line);
line = removeNewLineCharacters(line);
line = removeBadCharacters(line);
formattedLines.add(line);
}
return formattedLines;
}
private String removeLineNumbers(String line) {
return PublicHearingTextUtils.stripLineNumber(line);
}
private String removeNewLineCharacters(String line) {
return line.replaceAll("\\n", "");
}
private String removeBadCharacters(String line) {
line = line.replace(String.valueOf((char) 65533), "to");
line = line.replace("- ", "");
return line;
}
/** Capitalize a.m./p.m and remove the all '.' characters. */
private String formatAmPm(String dateTime) {
final Pattern AM_PM = Pattern.compile("(a.m.|p.m.)");
Matcher matcher = AM_PM.matcher(dateTime);
matcher.find();
String capitalized = matcher.group(1).toUpperCase();
return matcher.replaceFirst(capitalized).replaceAll("\\.", "");
}
}