package gov.nysenate.openleg.processor.daybreak;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import gov.nysenate.openleg.model.base.Version;
import gov.nysenate.openleg.model.bill.BillAction;
import gov.nysenate.openleg.model.bill.BillId;
import gov.nysenate.openleg.model.spotcheck.daybreak.DaybreakBill;
import gov.nysenate.openleg.model.spotcheck.daybreak.DaybreakBillAmendment;
import gov.nysenate.openleg.model.spotcheck.daybreak.DaybreakFragment;
import gov.nysenate.openleg.model.spotcheck.daybreak.PageFileEntry;
import gov.nysenate.openleg.model.entity.Chamber;
import gov.nysenate.openleg.util.DateUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.text.SimpleDateFormat;
import java.time.LocalDate;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
* Contains a method that parses a daybreak fragment into a daybreak bill
*/
public class DaybreakFragmentParser {
private static Logger logger = LoggerFactory.getLogger(DaybreakFragmentParser.class);
/** Patterns for extracting metadata from bill actions */
private static Pattern billActionPattern = Pattern.compile("(\\d{2}/\\d{2}/\\d{2}) (.*)");
private static SimpleDateFormat billActionDateFormat = new SimpleDateFormat("MM/dd/yy");
/** Pattern for detecting short names that contain the member's first initial */
private static String shortNameInitialRegex = "([A-Z])\\. ([A-Za-z\\-' ]*)";
/** Pattern for extracting sponsors from Rules sponsors */
private static Pattern rulesSponsorPattern = Pattern.compile("RULES COM \\(Request of ([A-Za-z\\-\\.', ]*)\\)");
/**
* Parses the text of a daybreak fragment into the fields of a daybreak bill
* @param daybreakFragment
* @return
*/
public static DaybreakBill extractDaybreakBill(DaybreakFragment daybreakFragment){
// Set active billid from the daybreak fragment
DaybreakBill daybreakBill = new DaybreakBill(daybreakFragment.getDaybreakBillId());
daybreakBill.setActiveVersion(daybreakFragment.getBillId().getVersion());
// Split the fragment text into lines
String[] fragmentParts = daybreakFragment.getDaybreakText().split("\\n");
// Parse each fragment line accordingly
parseSponsors(daybreakBill, fragmentParts[1]);
daybreakBill.setTitle(fragmentParts[2]);
daybreakBill.setLawSection(fragmentParts[3]);
daybreakBill.setLawCodeAndSummary(fragmentParts[4].replaceAll("BILL SUMMARY NOT FOUND", ""));
parseActions(daybreakBill, Arrays.copyOfRange(fragmentParts, 5, fragmentParts.length));
// Convert the page file entries into amendments
if (daybreakFragment.getPageFileEntries() == null) {
// If bill is not in page file. Set it to an empty map.
daybreakBill.setAmendments(ImmutableMap.of());
} else {
// Otherwise include all amendments in the page file.
daybreakBill.setAmendments(parsePageFileEntries(daybreakFragment.getPageFileEntries()));
}
return daybreakBill;
}
/**
* Given a line containing sponsor data, calls the correct parser depending on the bill's chamber
* @param daybreakBill
* @param sponsorLine
*/
private static void parseSponsors(DaybreakBill daybreakBill, String sponsorLine){
if(sponsorLine.startsWith("RULES")){
parseRulesSponsors(daybreakBill, sponsorLine);
}
else {
switch (daybreakBill.getBaseBillId().getChamber()) {
case SENATE:
parseSenateSponsors(daybreakBill, sponsorLine); break;
case ASSEMBLY:
parseAssemblySponsors(daybreakBill, sponsorLine); break;
}
}
}
/**
* Parses sponsors for senate data, which is divided into primary sponsor and cosponsors
* @param daybreakBill
* @param sponsorLine
*/
private static void parseSenateSponsors(DaybreakBill daybreakBill, String sponsorLine) {
String[] sponsorsByType = sponsorLine.split("CO:");
daybreakBill.setSponsor(formatShortName(sponsorsByType[0]));
if(sponsorsByType.length > 1) {
daybreakBill.setCosponsors(parseCSVSponsors(sponsorsByType[1]));
}
}
/**
* Parses sponsors for assembly data, which consists of a primary sponsor, cosponsors and multisponsors
* @param daybreakBill
* @param sponsorLine
*/
private static void parseAssemblySponsors(DaybreakBill daybreakBill, String sponsorLine){
String[] sponsorsByType = sponsorLine.split("; M-S:");
// Get the primary sponsor and co sponsors as one list
List<String> sponsors = parseCSVSponsors(sponsorsByType[0]);
daybreakBill.setSponsor(sponsors.remove(0)); // remove the primary sponsor and set it as the daybreak bill sponsor
daybreakBill.setCosponsors(sponsors);
if(sponsorsByType.length > 1){
daybreakBill.setMultiSponsors(parseCSVSponsors(sponsorsByType[1]));
}
}
/**
* Parses sponsors when the sponsor is a Rules committee
* @param daybreakBill
* @param sponsorLine
*/
private static void parseRulesSponsors(DaybreakBill daybreakBill, String sponsorLine){
Matcher rulesSponsorMatcher = rulesSponsorPattern.matcher(sponsorLine);
if(rulesSponsorMatcher.matches()){
List<String> sponsors = parseCSVSponsors(rulesSponsorMatcher.group(1));
daybreakBill.setSponsor("RULES (" + sponsors.remove(0) + ")"); //Set the first sponsor as the main sponsor
daybreakBill.setCosponsors(sponsors);
}
else {
daybreakBill.setSponsor("RULES");
}
}
/**
* Ensures that certain sponsors' shortnames are stored in the proper format. Im looking at you P.�Lopez
* @param rawShortName
* @return
*/
private static String formatShortName(String rawShortName){
return rawShortName.trim().replaceAll(shortNameInitialRegex, "$2 $1");
}
/**
* Parses a comma separated value string, used to extract sponsors from a csv list
* @param csvString
* @return
*/
private static List<String> parseCSVSponsors(String csvString){
return Arrays.asList(csvString.split(",")).stream()
.map(DaybreakFragmentParser::formatShortName)
.collect(Collectors.toList());
}
/**
* Parses the action lines of a daybreak fragment, yielding a list of BillActions for a daybreak bill
* @param daybreakBill
* @param actionLines
*/
private static void parseActions(DaybreakBill daybreakBill, String[] actionLines){
List<BillAction> billActions = new ArrayList<>();
int sequenceNo = 0; // Sequence no for the action
for(int i = 0; i<actionLines.length; i++){
// Check to see if it is a valid action line
Matcher billActionMatcher = billActionPattern.matcher(actionLines[i]);
if(billActionMatcher.matches()) {
try {
// Get BillAction fields from the match
LocalDate actionDate = LocalDate.from(DateUtils.LRS_ACTIONS_DATE.parse(billActionMatcher.group(1)));
String actionText = billActionMatcher.group(2);
Chamber actionChamber = StringUtils.isAllUpperCase(actionText.replaceAll("[^a-zA-Z]+", "")) ?
Chamber.SENATE : Chamber.ASSEMBLY ;
billActions.add(
new BillAction(actionDate, actionText, actionChamber, ++sequenceNo, daybreakBill.getBaseBillId())
);
} catch (DateTimeParseException ex) {
logger.error("Could not parse date " + billActionMatcher.group(1) + " for " + daybreakBill.getDaybreakBillId());
logger.error(ex.getMessage());
}
}
else if (billActions.size() > 0) { // If an invalid line is detected after actions have been read, stop
break;
}
}
daybreakBill.setActions(billActions);
}
/**
* Converts the map of page file entries found in a daybreak fragment
* into a map of versions to daybreak bill amendments for use in a daybreak bill
* @param pageFileEntries
* @return
*/
private static Map<Version, DaybreakBillAmendment> parsePageFileEntries(Map<BillId, PageFileEntry> pageFileEntries){
List<DaybreakBillAmendment> amendments = pageFileEntries.entrySet().stream()
.map(entry -> parsePageFileEntry(entry.getKey(), entry.getValue()))
.collect(Collectors.toList());
return Maps.uniqueIndex(amendments, amend -> amend.getBillId().getVersion());
}
/**
* Creates a DaybreakBillAmendment from the fields of a PageFileEntry
* @param billId
* @param pageFileEntry
* @return
*/
private static DaybreakBillAmendment parsePageFileEntry(BillId billId, PageFileEntry pageFileEntry){
return new DaybreakBillAmendment(
billId,
billId.getChamber()== Chamber.SENATE ?
pageFileEntry.getAssemblyBillId() :
pageFileEntry.getSenateBillId(),
pageFileEntry.getPageCount(),
pageFileEntry.getPublishedDate()
);
}
}