package gov.nysenate.openleg.model.sobi;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.google.common.base.MoreObjects;
import org.apache.commons.io.FileUtils;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.text.ParseException;
import java.time.LocalDateTime;
import java.time.ZoneId;
/**
* The SobiFile class wraps the sobi files sent from LBDC and retains some basic meta data.
* SobiFiles can be broken down into SobiFragments which store data about the type of content in
* the file and various processing related meta data.
*
* @see SobiFragment
*/
public class SobiFile
{
/**
* SOBI files are (mostly) in a CP850 or similar encoding. This was determined from the byte mapping of
* paragraph/section characters to 244/245. This can't be 100% correct though because the degree symbol
* must be 193 in the correct code set. See SOBI.D120612.T125850.TXT.
*/
public static final String DEFAULT_ENCODING = "CP850";
/** The format required for the SOBI file name. e.g. SOBI.D130323.T065432.TXT */
private static final String sobiDateFullPattern = "'SOBI.D'yyMMdd'.T'HHmmss'.TXT'";
/** Alternate format for SOBI files with no seconds specified in the filename */
private static final String sobiDateNoSecsPattern = "'SOBI.D'yyMMdd'.T'HHmm'.TXT'";
/** Reference to the actual sobi file. */
private File file;
/** The encoding this file was written in. */
private String encoding;
/** The datetime when the SobiFile was recorded into the backing store. */
private LocalDateTime stagedDateTime;
/** Indicates if the underlying 'file' reference has been moved into an archive directory. */
private boolean archived;
/** --- Constructors --- */
public SobiFile(File sobiFile) throws IOException, SobiFileNotFoundEx {
this(sobiFile, DEFAULT_ENCODING);
}
public SobiFile(File file, String encoding) throws IOException, SobiFileNotFoundEx {
if (file.exists()) {
this.file = file;
this.encoding = encoding;
this.archived = false;
// Attempt to parse the sobi file name, raising an exception if the name is invalid
getPublishedDateTime();
}
else {
throw new FileNotFoundException(file.getAbsolutePath());
}
}
/** --- Functional Getters/Setters --- */
/**
* The file name serves as the unique identifier for the SobiFile.
*/
public String getFileName() {
return this.file.getName();
}
/**
* Retrieves the text contained within the file. The text is not saved due to the
* added memory overhead when retaining references to SobiFiles.
*/
@JsonIgnore
public String getText() {
try {
return FileUtils.readFileToString(file, encoding);
}
catch (IOException e) {
throw new UnreadableSobiEx(this, e);
}
}
/**
* The published datetime is determined via the file name. If an error is encountered when
* parsing the date, the last modified datetime of the file will be used instead.
* @throws InvalidSobiNameEx if this sobi has a filename that cannot be parsed
*/
public LocalDateTime getPublishedDateTime() throws InvalidSobiNameEx {
String fileName = this.getFileName();
try {
return LocalDateTime.ofInstant(
org.apache.commons.lang3.time.DateUtils.parseDate(
fileName, sobiDateFullPattern, sobiDateNoSecsPattern)
.toInstant(),
ZoneId.systemDefault());
}
catch (ParseException ex) {
throw new InvalidSobiNameEx(fileName, ex);
}
}
/** --- Override Methods --- */
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("file", file)
.add("encoding", encoding)
.add("stagedDateTime", stagedDateTime)
.add("archived", archived)
.toString();
}
/** --- Basic Getters/Setters --- */
public File getFile() {
return file;
}
public void setFile(File file) {
this.file = file;
}
public String getEncoding() {
return encoding;
}
public LocalDateTime getStagedDateTime() {
return stagedDateTime;
}
public void setStagedDateTime(LocalDateTime stagedDateTime) {
this.stagedDateTime = stagedDateTime;
}
public boolean isArchived() {
return archived;
}
public void setArchived(boolean archived) {
this.archived = archived;
}
}