package gov.nysenate.openleg.processor.law;
import gov.nysenate.openleg.model.law.*;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static java.util.stream.Collectors.toList;
public abstract class AbstractLawBuilder implements LawBuilder
{
private static final Logger logger = LoggerFactory.getLogger(AbstractLawBuilder.class);
/** Pattern used for parsing the location ids to extract the document type and doc type id. */
protected static Pattern locationPattern = Pattern.compile("^(ST|SP|SA|A|T|P|S|INDEX)(.*)");
/** Pattern for certain chapter nodes that don't have the usual -CH pattern. */
protected static Pattern specialChapterPattern = Pattern.compile("^(AS|ASSEMBLYRULES|SENATERULES)$");
/** The location ids portions are prefixed with a code to indicate the different document types. */
protected static Map<String, LawDocumentType> lawLevelCodes = new HashMap<>();
static {
lawLevelCodes.put("A", LawDocumentType.ARTICLE);
lawLevelCodes.put("SA", LawDocumentType.SUBARTICLE);
lawLevelCodes.put("T", LawDocumentType.TITLE);
lawLevelCodes.put("ST", LawDocumentType.SUBTITLE);
lawLevelCodes.put("P", LawDocumentType.PART);
lawLevelCodes.put("SP", LawDocumentType.SUB_PART);
lawLevelCodes.put("S", LawDocumentType.SECTION);
lawLevelCodes.put("INDEX", LawDocumentType.INDEX);
}
/** A law version id that is obtained from the law blocks. */
protected LawVersionId lawVersionId;
/** The root node in the law tree. */
protected LawTreeNode rootNode = null;
/** Basic Chapter info. */
protected LawInfo lawInfo;
/** Map of all the documents that need to be persisted. */
protected Map<String, LawDocument> lawDocMap = new HashMap<>();
/** A sequence number is used to maintain the order of the nodes. */
protected int sequenceNo = 0;
/** --- Constructors --- */
public AbstractLawBuilder(LawVersionId lawVersionId) {
this.lawVersionId = lawVersionId;
}
public AbstractLawBuilder(LawVersionId lawVersionId, LawTree previousTree) {
this(lawVersionId);
if (previousTree != null) {
this.rootNode = previousTree.getRootNode();
this.lawInfo = previousTree.getLawInfo();
}
}
/** --- Abstract Methods --- */
/**
* The override of this method should be able to figure out which location id is the parent of the
* given law document.
*
* @param block LawBlock
* @return String
*/
protected abstract String determineHierarchy(LawBlock block);
/**
* Handles any behaviors relating to adding a new child to the hierarchy.
* @param node LawTreeNode
*/
protected abstract void addChildNode(LawTreeNode node);
protected abstract boolean isNodeListEmpty();
protected abstract void clearParents();
/** --- Methods --- */
/**
* {@inheritDoc}
*/
public void addInitialBlock(LawBlock block, boolean isNewDoc) {
final LawDocument lawDoc = new LawDocument(block);
boolean isRootDoc = false;
// For the initial law dumps, the first block that is processed for a law (usually) becomes the root node.
if (rootNode == null) {
logger.info("Processing root doc: {} for {} law.", lawDoc.getDocumentId(), lawDoc.getLawId());
LawDocument chapterDoc;
// If the block seems to be a chapter node, we'll treat this document as the root.
if (isLikelyChapterDoc(lawDoc)) {
lawDoc.setDocType(LawDocumentType.CHAPTER);
lawDoc.setDocTypeId(lawDoc.getLocationId().replaceFirst("-CH", ""));
chapterDoc = lawDoc;
isRootDoc = true;
}
// Otherwise we have to create our own root node and process the current document as a child of it.
else {
chapterDoc = createRootDocument(block);
}
lawInfo = deriveLawInfo(chapterDoc.getLawId(), (isRootDoc) ? chapterDoc.getDocTypeId() : "");
addRootDocument(chapterDoc, isNewDoc);
}
// If this block is not a root doc,
if (!isRootDoc) {
// Section docs are easy, since their location ids are simply numbers and they do not have any children.
if (isLikelySectionDoc(lawDoc)) {
logger.debug("Processing section {}", lawDoc.getDocumentId());
lawDoc.setDocType(LawDocumentType.SECTION);
lawDoc.setDocTypeId(lawDoc.getLocationId());
if (isNewDoc) {
lawDocMap.put(lawDoc.getDocumentId(), lawDoc);
}
addChildNode(new LawTreeNode(lawDoc, ++sequenceNo));
}
else {
String specificLocId = determineHierarchy(block);
Matcher locMatcher = locationPattern.matcher(specificLocId);
if (locMatcher.matches()) {
lawDoc.setDocType(lawLevelCodes.get(locMatcher.group(1)));
lawDoc.setDocTypeId(locMatcher.group(2));
}
else {
logger.warn("Failed to parse the following location {}. Setting as MISC type.", lawDoc.getDocumentId());
lawDoc.setDocType(LawDocumentType.MISC);
lawDoc.setDocTypeId(block.getLocationId());
}
addDocument(lawDoc, isNewDoc);
}
}
// Set the title for the document
lawDoc.setTitle(LawTitleParser.extractTitle(lawDoc, lawDoc.getText()));
}
/**
* {@inheritDoc}
*/
public void addUpdateBlock(LawBlock block) {
// Rebuild the law tree
if (block.getMethod().equals("*MASTER*")) {
rebuildTree(block.getText().toString());
}
// Repeal the document
else if (block.getMethod().equals("*REPEAL*")) {
logger.info("{} , {}", block.getDocumentId(), rootNode);
Optional<LawTreeNode> node = rootNode.findNode(block.getDocumentId(), false);
if (node.isPresent()) {
logger.info("Repealing {}", block.getDocumentId());
node.get().setRepealedDate(block.getPublishedDate());
}
else {
logger.warn("Failed to repeal document {} because it could not be located within the law tree!");
}
}
// Delete the document
else if (block.getMethod().equals("*DELETE*")) {
logger.info("Deleting {}", block.getDocumentId());
rootNode.findNode(block.getDocumentId(), true);
}
// Update the document
else if (block.getMethod().isEmpty()) {
if (rootNode != null) {
Optional<LawDocInfo> existingDocInfo = rootNode.find(block.getDocumentId());
if (existingDocInfo.isPresent()) {
existingDocInfo.get().setPublishedDate(block.getPublishedDate());
LawDocument lawDoc = new LawDocument(existingDocInfo.get(), block.getText().toString());
// Re-parse the titles
lawDoc.setTitle(LawTitleParser.extractTitle(lawDoc, block.getText().toString()));
lawDocMap.put(lawDoc.getDocumentId(), lawDoc);
logger.info("Updated {}", lawDoc.getDocumentId());
}
else {
throw new LawParseException("Can't add law document " + block.getDocumentId() +
" without a prior law tree structure including it.");
}
}
else {
throw new LawParseException("Can't add law document " + block.getDocumentId() + " without a prior law tree.");
}
}
else {
throw new LawParseException("Don't know how to handle law block updates with method: " + block.getMethod());
}
}
/**
* {@inheritDoc}
*/
public void rebuildTree(String masterDoc) {
LawTreeNode priorRootNode = this.rootNode;
this.rootNode = null;
logger.info("Rebuilding tree for {} with master document.", this.lawVersionId.getLawId());
// Clear out any existing parents when rebuilding trees.
clearParents();
for (String docId : StringUtils.split(masterDoc, "\\n")) {
// Apply doc id replacements if necessary
final String resolvedDocId = LawDocIdFixer.applyReplacement(docId, this.lawVersionId.getPublishedDate());
LawBlock block = new LawBlock();
block.setDocumentId(resolvedDocId);
block.setLawId(resolvedDocId.substring(0, 3));
block.setLocationId(resolvedDocId.substring(3));
// Use published date from existing law doc if present
if (lawDocMap.containsKey(resolvedDocId)) {
block.setPublishedDate(lawDocMap.get(resolvedDocId).getPublishedDate());
logger.debug("Processed law doc id found for {} with published date {}", resolvedDocId, block.getPublishedDate());
addInitialBlock(block, false);
continue;
}
// Or from the previous tree node if set
else if (priorRootNode != null) {
Optional<LawDocInfo> existingDocInfo = priorRootNode.find(resolvedDocId);
if (existingDocInfo.isPresent()) {
block.setPublishedDate(existingDocInfo.get().getPublishedDate());
addInitialBlock(block, false);
logger.debug("Found existing law with doc id {} with published date {}",
block.getDocumentId(), block.getPublishedDate());
continue;
}
}
logger.info("New document id found in master document: {}", resolvedDocId);
block.setPublishedDate(this.lawVersionId.getPublishedDate());
addInitialBlock(block, true);
}
}
/**
* {@inheritDoc}
*/
public LawTree getProcessedLawTree() {
return new LawTree(lawVersionId, rootNode, lawInfo);
}
/**
* {@inheritDoc}
*/
public List<LawDocument> getProcessedLawDocuments() {
return lawDocMap.values().stream().collect(toList());
}
/**
* Add the root document which does not have to be associated with a parent.
*
* @param rootDoc LawDocument
* @param isNewDoc boolean - Set to true if this is a new document and should be persisted.
*/
protected void addRootDocument(LawDocument rootDoc, boolean isNewDoc) {
if (rootDoc == null) throw new IllegalArgumentException("Root document cannot be null!");
sequenceNo = 0;
rootNode = new LawTreeNode(rootDoc, ++sequenceNo);
if (isNewDoc) {
lawDocMap.put(rootDoc.getDocumentId(), rootDoc);
}
addChildNode(this.rootNode);
}
/**
* Add the document by associating it as a child of the current parent node and subsequently setting the
* current parent node to point to this document.
*
* @param lawDoc LawDocument
* @param isNewDoc boolean - Set to true if this is a new document and should be persisted.
*/
protected void addDocument(LawDocument lawDoc, boolean isNewDoc) {
if (isNodeListEmpty()) {
throw new IllegalStateException("Failed to add node because it's parent node was not added!");
}
if (isNewDoc) {
lawDocMap.put(lawDoc.getDocumentId(), lawDoc);
}
LawTreeNode node = new LawTreeNode(lawDoc, ++sequenceNo);
addChildNode(node);
}
/**
* Constructs the LawInfo based on the LawChapterType mapping.
*
* @param lawId String
* @param chapterId String
* @return LawInfo
*/
protected LawInfo deriveLawInfo(String lawId, String chapterId) {
LawInfo chapter = new LawInfo();
chapter.setLawId(lawId);
chapter.setChapterId(chapterId);
try {
LawChapterCode chapterType = LawChapterCode.valueOf(lawId);
chapter.setName(chapterType.getName());
chapter.setType(chapterType.getType());
}
catch (IllegalArgumentException ex) {
chapter.setName("");
chapter.setType(LawType.MISC);
}
return chapter;
}
/**
* Indicates if the document is potentially a chapter node. Consolidated laws will typically begin with a -CH
* which is not a problem, but some unconsolidated laws have the year or in some cases start right with the
* section or article. We're checking to make sure those cases do not exist for this block.
*
* @param doc LawDocument
* @return boolean
*/
protected boolean isLikelyChapterDoc(LawDocument doc) {
return (doc.getLocationId().startsWith("-CH") || specialChapterPattern.matcher(doc.getLocationId()).matches() ||
(!doc.getLocationId().equals("1") && !locationPattern.matcher(doc.getLocationId()).matches()));
}
/**
* Section documents typically just have a location id with the number of the section (except in the constitution).
* All other document types start with a character or symbol.
*
* @param lawDoc LawDocument
* @return boolean - true if this block is most likely a section
*/
protected boolean isLikelySectionDoc(LawDocument lawDoc) {
return Character.isDigit(lawDoc.getLocationId().charAt(0));
}
/**
* Create our own root law doc to serve as the root document in the event that we don't receive a top level doc
* from the dumps. This is common for unconsolidated laws where they just start with the first section or article.
*
* @param block LawBlock
*/
protected LawDocument createRootDocument(LawBlock block) {
LawDocument dummyParent = new LawDocument();
dummyParent.setLawId(block.getLawId());
dummyParent.setDocumentId(block.getLawId() + "-ROOT");
dummyParent.setLocationId("-ROOT");
dummyParent.setDocType(LawDocumentType.CHAPTER);
dummyParent.setDocTypeId("ROOT");
dummyParent.setPublishedDate(block.getPublishedDate());
dummyParent.setText("");
dummyParent.setTitle(LawTitleParser.extractTitleFromChapter(dummyParent));
return dummyParent;
}
}