package gov.nysenate.openleg.service.spotcheck.billtext;
import gov.nysenate.openleg.client.view.bill.BillInfoView;
import gov.nysenate.openleg.model.bill.BaseBillId;
import gov.nysenate.openleg.model.bill.Bill;
import gov.nysenate.openleg.model.bill.BillAmendment;
import gov.nysenate.openleg.model.spotcheck.*;
import gov.nysenate.openleg.model.spotcheck.billtext.BillTextReference;
import gov.nysenate.openleg.service.bill.data.BillDataService;
import gov.nysenate.openleg.service.spotcheck.base.SpotCheckService;
import gov.nysenate.openleg.util.OutputUtils;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import javax.annotation.PostConstruct;
import java.time.LocalDateTime;
import java.util.Arrays;
import static gov.nysenate.openleg.model.spotcheck.SpotCheckMismatchType.*;
/**
* Created by kyle on 2/19/15.
*/
@Service
public class BillTextCheckService implements SpotCheckService<BaseBillId, Bill, BillTextReference>{
private static final Logger logger = Logger.getLogger(BillTextCheckService.class);
@Autowired
BillDataService billDataService;
@PostConstruct
public void init(){
}
@Override
public SpotCheckObservation<BaseBillId> check(Bill content) throws ReferenceDataNotFoundEx {
throw new NotImplementedException(":P");
}
@Override
public SpotCheckObservation<BaseBillId> check(Bill content, LocalDateTime start, LocalDateTime end)
throws ReferenceDataNotFoundEx {
throw new NotImplementedException(":P");
}
@Override
public SpotCheckObservation<BaseBillId> check(Bill bill, BillTextReference reference) {
if (reference == null) {
throw new IllegalArgumentException("BillTextSpotcheckReference cannot be null when performing spot check");
}
BaseBillId baseBillId = bill.getBaseBillId();
SpotCheckReferenceId referenceId = reference.getReferenceId();
final SpotCheckObservation<BaseBillId> observation = new SpotCheckObservation<>(referenceId, baseBillId);
//Add mismatches to observation
if (reference.isNotFound()) {
observation.addMismatch(new SpotCheckMismatch(REFERENCE_DATA_MISSING, "", reference.getText()));
} else {
checkAmendment(bill, reference, observation);
if (bill.hasAmendment(reference.getActiveVersion())) {
BillAmendment amendment = bill.getAmendment(reference.getActiveVersion());
checkBillText(amendment, reference, observation);
// Only check senate, non-resolution bills for sponsor memos
// Todo find a better way of checking memo text
// currently, memos are sent daily in batches and are not guaranteed to be present in sobi data if on lrs
// also, memos are formatted a bit differently
// if (Chamber.SENATE.equals(baseBillId.getChamber()) && !baseBillId.getBillType().isResolution()) {
// checkMemoText(amendment, reference, observation);
// }
}
}
return observation;
}
private void checkAmendment(Bill bill, BillTextReference reference, SpotCheckObservation<BaseBillId> obsrv) {
if (bill.getActiveVersion() == null || !bill.getActiveVersion().equals(reference.getActiveVersion())) {
obsrv.addMismatch(new SpotCheckMismatch(BILL_ACTIVE_AMENDMENT,
bill.getActiveVersion(), reference.getActiveVersion()));
}
}
/**
* Checks text with all whitespace removed, and generates several mismatches with different levels of text
* normalization if there was a mismatch in the no-whitespace text
*/
private void checkBillText(BillAmendment billAmendment, BillTextReference reference, SpotCheckObservation<BaseBillId> obsrv){
String dataText = billAmendment.getFullText();
String refText = reference.getText();
String strippedDataText = stripNonAlpha(dataText);
String strippedRefText = stripNonAlpha(refText);
// Check normalized text and report on non-normalized text as well if there is a mismatch
if (!StringUtils.equals(strippedRefText, strippedDataText)) {
String pureContentRefText = stripNonContent(refText);
String pureContentDataText = stripNonContent(dataText);
if (!StringUtils.equals(pureContentRefText, pureContentDataText)) {
obsrv.addMismatch(new SpotCheckMismatch(BILL_TEXT_CONTENT, dataText, refText));
} else {
obsrv.addMismatch(new SpotCheckMismatch(BILL_TEXT_LINE_OFFSET, dataText, refText));
}
}
}
private void checkMemoText(BillAmendment billAmendment, BillTextReference reference, SpotCheckObservation<BaseBillId> obsrv){
String dataMemo = billAmendment.getMemo();
String refMemo = reference.getMemo();
if (!StringUtils.equalsIgnoreCase(dataMemo, refMemo)) {
obsrv.addMismatch(new SpotCheckMismatch(BILL_MEMO, dataMemo, refMemo));
}
}
/**
* Removes all non alpha characters
*/
private String stripNonAlpha(String text) {
return text.replaceAll("(?:[^\\w]|_)+", "");
}
private static final String lineNumberRegex = "(?:^( {4}\\d| {3}\\d\\d))";
private static final String pageMarkerRegex = "^ {7}[A|S]\\. \\d+(--[A-Z])?[ ]+\\d+([ ]+[A|S]\\. \\d+(--[A-Z])?)?$";
private static final String budgetPageMargerRegex = "^[ ]{42,43}\\d+[ ]+\\d+-\\d+-\\d+$";
private static final String explanationRegex = "^[ ]+EXPLANATION--Matter in ITALICS \\(underscored\\) is new; matter in brackets\\n";
private static final String explanationRegex2 = "^[ ]+\\[ ] is old law to be omitted.\\n[ ]+LBD\\d+-\\d+-\\d+$";
private static final String ultraNormalizeRegex = "(?m)" + String.join("|", Arrays.asList(
lineNumberRegex, pageMarkerRegex, budgetPageMargerRegex, explanationRegex, explanationRegex2));
/**
* Removes all whitespace, line numbers, and page numbers
*/
private String stripNonContent(String text) {
String stripped = text.replaceAll(ultraNormalizeRegex, "");
return stripNonAlpha(stripped);
}
}