package gov.nysenate.openleg.service.source;
import com.google.common.collect.ImmutableMap;
import gov.nysenate.openleg.model.base.Version;
import gov.nysenate.openleg.model.bill.BaseBillId;
import gov.nysenate.openleg.model.bill.BillId;
import gov.nysenate.openleg.model.spotcheck.billtext.BillTextReference;
import gov.nysenate.openleg.processor.base.ParseError;
import gov.nysenate.openleg.service.scraping.BillTextScraper;
import gov.nysenate.openleg.service.scraping.ScrapedBillTextParser;
import gov.nysenate.openleg.util.FileIOUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.text.StrSubstitutor;
import org.apache.http.HttpResponse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.File;
import java.io.IOException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;
/**
* A class that can be used to generate patch bill text sobis
*/
@Service
public class LRSBillTextSobiMaker {
private static final Logger logger = LoggerFactory.getLogger(LRSBillTextSobiMaker.class);
private static final String sobiDocTemplate =
"<?xml version='1.0' encoding='UTF-8'?>\n" +
"<PATCH>\n" +
"Copied LRS bill text for ${billIds}\n" +
"</PATCH>\n" +
"<DATAPROCESS TIME=\"${pubDateTime}\">\n" +
"${data}" +
"</DATAPROCESS>\n" +
"<SENATEDATA TIME=\"${pubDateTime}\">\n" +
"No data to process on ${pubDate} at ${pubTime}\n" +
"</SENATEDATA>";
private static final DateTimeFormatter sobiFileNameFormat =
DateTimeFormatter.ofPattern("'SOBI.D'yyMMdd.'T'HHmmss.'TXT'");
private static final DateTimeFormatter pubDateTimeFormat = DateTimeFormatter.ofPattern("yyyy-MM-dd-HH.mm.ss");
private static final DateTimeFormatter pubDateFormat = DateTimeFormatter.ofPattern("dd/mm/yyyy");
private static final DateTimeFormatter pubTimeFormat = DateTimeFormatter.ofPattern("HH:mm:ss");
private static final String billTextHeaderTemplateTemplate =
"00000.SO DOC ${printNo}%s BTXT ${year}";
private static final String billTextHeaderTemplate = String.format(billTextHeaderTemplateTemplate, " ");
private static final String billTextCloserTemplate = String.format(billTextHeaderTemplateTemplate, "*END* ");
@Autowired BillTextScraper billTextScraper;
@Autowired ScrapedBillTextParser billTextParser;
private File scrapedDir = new File("/tmp/scraped-bills");
/**
* Attempts to scrape bill text for the given bill ids
* Formats the scraped bill texts into a sobi file written to the result dir
* @param billIds Collection<BaseBillId> - bill ids to be scraped
* @param resultDir File - the directory to save the generated sobi
*/
public void makeSobi(Collection<BaseBillId> billIds, File resultDir) {
try {
scrapeBills(billIds);
List<BillTextReference> btrs = parseBills();
StringBuilder dataBuilder = new StringBuilder();
for (BillTextReference btr : btrs) {
addBillText(btr, dataBuilder);
}
writeSobi(dataBuilder, resultDir, btrs.stream().map(BillTextReference::getBillId).collect(Collectors.toList()));
} catch (IOException ex) {
logger.error("Error while generating sobis \n{}", ex);
}
}
/**
* Appends sobi formatted bill text from the given bill text reference to the given string builder
*/
private void addBillText(BillTextReference btr, StringBuilder dataBuilder) {
logger.info("formatting {}", btr.getBillId());
BillId billId = btr.getBillId();
// Format the print no portion of the header
String headerPrintNo = String.format("%s %d%s", billId.getBillType(), billId.getNumber(), billId.getVersion());
int length = headerPrintNo.length();
for (int i = 0; i < 16 - length; i++) {
headerPrintNo += " ";
}
// Format the header
String yearString = Integer.toString(btr.getSessionYear());
String header = StrSubstitutor.replace(billTextHeaderTemplate,
ImmutableMap.of("printNo", headerPrintNo, "year", yearString));
// Format the line start for this bill
String lineStart = String.format("%s%s%05d%sT", yearString, billId.getBillType(), billId.getNumber(),
billId.getVersion() != Version.DEFAULT ? billId.getVersion() : " ");
int textLine = 1; // Tracks text line numbers
int totalLine = 0; // Tracks total lines that have been added including headers
for (String line : btr.getText().split("\n")) {
dataBuilder.append(lineStart);
if (totalLine % 100 == 0) { // Add a header every 100 lines
dataBuilder.append(header)
.append("\n")
.append(lineStart);
totalLine++;
}
// Append line data
dataBuilder.append(String.format("%05d", textLine));
dataBuilder.append(line);
dataBuilder.append("\n");
textLine++;
totalLine++;
}
// Add a closing header
dataBuilder.append(lineStart)
.append(StrSubstitutor.replace(billTextCloserTemplate,
ImmutableMap.of("printNo", headerPrintNo, "year", yearString)))
.append("\n");
}
/**
* Downloads LRS html bill files for the given bill ids
*/
private void scrapeBills(Collection<BaseBillId> billIds) throws IOException {
FileUtils.forceMkdir(scrapedDir);
for (BaseBillId billId : billIds) {
logger.info("scraping {}", billId);
String url = billTextScraper.constructUrl(billId);
HttpResponse res = billTextScraper.makeRequest(url);
billTextScraper.saveResponseToFile(res, billTextScraper.getSaveFile(scrapedDir, billId));
}
}
/**
* Parses all bill html files in the scraped directory into BillTextReferences
*/
private List<BillTextReference> parseBills() throws IOException {
Collection<File> scrapedBills = FileIOUtils.safeListFiles(scrapedDir, false, new String[]{});
List<BillTextReference> btrs = new ArrayList<>();
for (File scrapedFile : scrapedBills) {
try {
logger.info("parsing {}", scrapedFile);
btrs.add(billTextParser.parseReference(scrapedFile));
scrapedFile.delete();
} catch (ParseError ex) {
logger.error("error parsing scraped bill file {}:\n{}", scrapedFile, ex);
}
}
return btrs;
}
/**
* Formats the given text data into a sobi file format and saves it to the destination dir
*/
private void writeSobi(StringBuilder data, File destinationDir, Collection<BillId> billIds) throws IOException {
LocalDateTime pubDateTime = LocalDateTime.now();
String fileContents = StrSubstitutor.replace(sobiDocTemplate,
ImmutableMap.of("data", data.toString(), "pubDateTime", pubDateTime.format(pubDateTimeFormat),
"pubDate", pubDateTime.format(pubDateFormat), "pubTime", pubDateTime.format(pubTimeFormat),
"billIds", StringUtils.join(billIds, ", ")));
FileUtils.forceMkdir(destinationDir);
File sobiFile = new File(destinationDir, pubDateTime.format(sobiFileNameFormat));
logger.info("writing {}", sobiFile);
FileUtils.write(sobiFile, fileContents, "UTF-8");
}
}