package gov.nysenate.openleg.processor.transcript;
import gov.nysenate.openleg.model.transcript.Transcript;
import gov.nysenate.openleg.model.transcript.TranscriptFile;
import gov.nysenate.openleg.model.transcript.TranscriptId;
import gov.nysenate.openleg.service.transcript.data.TranscriptDataService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
@Service
public class TranscriptParser
{
private static final Logger logger = LoggerFactory.getLogger(TranscriptParser.class);
private static final String TRANSCRIPT_ENCODING = "latin1";
@Autowired
private TranscriptDataService transcriptDataService;
public void process(TranscriptFile transcriptFile) throws IOException {
String sessionType = null;
String location = null;
String date = null;
String time = null;
int numSkipped = 0;
StringBuilder transcriptText = new StringBuilder();
boolean firstPageParsed = false;
boolean firstLineParsed = false;
boolean skipFirstThreeLines = false;
String lineText;
BufferedReader reader = new BufferedReader(new InputStreamReader(
new FileInputStream(transcriptFile.getFile()), TRANSCRIPT_ENCODING));
while ((lineText = reader.readLine()) != null) {
TranscriptLine line = new TranscriptLine(lineText);
if (!firstPageParsed) {
// Handle transcripts with 3 incorrect lines at start of transcript.
if (!firstLineParsed) {
if (lineText.contains("SESSION")) {
skipFirstThreeLines = true;
numSkipped = 1;
continue;
}
}
// Continue skipping lines 2 and 3 if first 3 lines are incorrect.
if (skipFirstThreeLines && numSkipped <= 3) {
numSkipped++;
continue;
}
if (line.isLocation())
location = line.getLocation();
if (line.isDate())
date = line.getDateString();
if (line.isTime())
time = line.getTimeString();
if (line.isSession())
sessionType = line.removeLineNumber().trim();
firstPageParsed = areWeDoneWithFirstPage(sessionType, location, date, time);
}
firstLineParsed = true;
transcriptText.append(line.fullText()).append("\n");
}
reader.close();
DateTimeFormatter dtf = DateTimeFormatter.ofPattern("MMMM d yyyy hmma");
LocalDateTime dateTime = LocalDateTime.parse(date + " " + time, dtf);
TranscriptId transcriptId = new TranscriptId(transcriptFile.getFileName());
Transcript transcript = new Transcript(transcriptId, sessionType, dateTime, location, transcriptText.toString());
transcriptDataService.saveTranscript(transcript, transcriptFile, true);
}
private boolean areWeDoneWithFirstPage(String sessionType, String location, String date, String time) {
return sessionType != null && location != null && date != null && time !=null;
}
}