package gov.nysenate.openleg.service.scraping; import gov.nysenate.openleg.util.StringDiffer; import org.springframework.stereotype.Repository; import javax.annotation.PostConstruct; import java.io.File; import java.io.IOException; import java.util.LinkedList; import java.util.Scanner; /** * Created by kyle on 3/12/15. */ @Repository public class ScrapedBillMemoParser { private String memoText = ""; private String amendment = ""; @PostConstruct public void init(){ } /** * * @param file * @return the text contained in the memo * @throws IOException */ public String getBillMemoText(File file) throws IOException{ Scanner scMemo = new Scanner(file); //check the first line for an amendment at the end. Regardless, it adds it to the memotext if (scMemo.hasNextLine()){ String amend = scMemo.nextLine(); memoText = memoText.concat(amend); amend = amend.substring(amend.length()-1); if (amend.matches("\\p{Upper}")){ this.amendment = amend; } } while (scMemo.hasNextLine()) { memoText = memoText.concat(scMemo.nextLine()); } scMemo.close(); parseBillMemo(memoText); return memoText; } public String getAmendment() throws IOException{ return this.amendment; } /** * * @param text * @throws Exception */ public void parseBillMemo(String text) throws IOException{ text = text.replaceAll("\\s", ""); text = text.replaceAll("\"", "\\\\\""); text = text.replaceAll("-", ""); text = text.replaceAll("\u00A0", ""); text = text.replaceFirst("SPONSOR: \\S+", ""); text = text.replaceAll("§", "§"); //strange memo character change } }