package gov.nysenate.openleg.processor.hearing;
import gov.nysenate.openleg.model.entity.Chamber;
import gov.nysenate.openleg.model.hearing.PublicHearingCommittee;
import gov.nysenate.openleg.util.PublicHearingTextUtils;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@Service
public class PublicHearingCommitteeParser
{
/* matches lines only containing "-" characters which divide up content. */
private static final Pattern SEPARATOR = Pattern.compile("^\\s*(\\d+)?\\s*-+$");
private static final Pattern CHECK_FOR_COMMITTEES = Pattern.compile("BEFORE THE NEW YORK STATE (SENATE|ASSEMBLY)");
private static final Pattern COMMITTEE_SPLIT = Pattern.compile("(, )?(?=AND(( THE)? SENATE|( THE)? ASSEMBLY))");
private static final Pattern FIRST_COMMITTEE = Pattern.compile(
"BEFORE THE NEW YORK STATE (?<chamber>SENATE|ASSEMBLY) ?(MAJORITY COALITION JOINT TASK FORCE ON |STANDING COMMITTEE ON )?(?<name>.*)");
private static final Pattern ADDITIONAL_COMMITTEE = Pattern.compile(
"AND (THE )?(SENATE|ASSEMBLY) (STANDING COMMITTEE|TASK FORCE) ON (.+)");
/**
* Extracts PublicHearingCommittee's from the first page of a PublicHearingFile.
* @param firstPage
* @return
*/
public List<PublicHearingCommittee> parse(List<String> firstPage) {
String committeeBlock = parseCommitteeBlock(firstPage);
return parse(committeeBlock);
}
/**
* Extracts PublicHearingCommittee's from a string containing the committee info.
* @param committeeBlock
* @return
*/
public List<PublicHearingCommittee> parse(String committeeBlock) {
if (!committeeExists(committeeBlock)) {
return null;
}
if (multipleCommittees(committeeBlock)) {
return parseMultipleCommittees(committeeBlock);
} else {
return Arrays.asList(parseSingleCommittee(committeeBlock));
}
}
/**
* Checks if any PublicHearingCommittee information is present in this committeeBlock.
* @param committeeBlock text to search for PublicHearingCommittee info.
* @return
*/
private boolean committeeExists(String committeeBlock) {
Matcher committeeMatcher = CHECK_FOR_COMMITTEES.matcher(committeeBlock);
return committeeMatcher.find();
}
/** Determines if a String contains info for multiple committees. */
private boolean multipleCommittees(String committeeBlock) {
Matcher committeeSplitMatcher = COMMITTEE_SPLIT.matcher(committeeBlock);
return committeeSplitMatcher.find();
}
/**
* Extract the PublicHearingCommittee's from a String when the String contains
* multiple PublicHearingCommittee's
* @param committeeBlock
* @return
*/
private List<PublicHearingCommittee> parseMultipleCommittees(String committeeBlock) {
List<PublicHearingCommittee> committees = new ArrayList<>();
// Parse the first committee.
String[] committeeStrings = committeeBlock.split(COMMITTEE_SPLIT.toString());
committees.add(parseSingleCommittee(committeeStrings[0]));
// Parse the additional committees.
for (int i = 1; i < committeeStrings.length; i++) {
if (!committeeStrings[i].isEmpty()) {
committees.add(parseAdditionalCommittee(committeeStrings[i]));
}
}
return committees;
}
/**
* Extracts the nth PublicHearingCommittee from a String where n > 1.
* @param committeeString The String containing PublicHearingCommittee information.
* @return
*/
private PublicHearingCommittee parseAdditionalCommittee(String committeeString) {
PublicHearingCommittee committee = new PublicHearingCommittee();
Matcher additionalCommitteeMatcher = ADDITIONAL_COMMITTEE.matcher(committeeString);
additionalCommitteeMatcher.find();
committee.setName(additionalCommitteeMatcher.group(4));
committee.setChamber(Chamber.valueOf(additionalCommitteeMatcher.group(2).toUpperCase()));
return committee;
}
/**
* Extracts the first PublicHearingCommittee from a String.
* @param committeeBlock
* @return
*/
private PublicHearingCommittee parseSingleCommittee(String committeeBlock) {
PublicHearingCommittee committee = new PublicHearingCommittee();
Matcher matchFirstCommittee = FIRST_COMMITTEE.matcher(committeeBlock);
matchFirstCommittee.find();
committee.setName(matchFirstCommittee.group("name").trim());
committee.setChamber(Chamber.valueOf(matchFirstCommittee.group("chamber").toUpperCase()));
return committee;
}
/** Parses out the block of text containing committee info from the first page of the PublicHearing. */
private String parseCommitteeBlock(List<String> firstPage) {
String committeeBlock = "";
for (String line : firstPage) {
// Committee is the first piece of information on page.
Matcher endOfCommittee = SEPARATOR.matcher(line);
if (endOfCommittee.matches()) {
break;
}
if (PublicHearingTextUtils.hasContent(line)) {
committeeBlock += " " + PublicHearingTextUtils.stripLineNumber(line);
}
}
return committeeBlock;
}
}