package gov.loc.repository.bagit.reader; import java.io.IOException; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.List; import java.util.ResourceBundle; import java.util.AbstractMap.SimpleImmutableEntry; import java.util.Arrays; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.helpers.MessageFormatter; import gov.loc.repository.bagit.domain.Version; import gov.loc.repository.bagit.exceptions.InvalidBagMetadataException; import gov.loc.repository.bagit.exceptions.InvalidBagitFileFormatException; import gov.loc.repository.bagit.exceptions.UnparsableVersionException; /** * This class is responsible for reading and parsing bagit.txt files from the filesystem */ public final class BagitTextFileReader { private static final Logger logger = LoggerFactory.getLogger(BagitTextFileReader.class); private static final byte[] BOM = new byte[]{(byte)0xEF, (byte)0xBB, (byte)0xBF}; private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle"); private BagitTextFileReader(){ //intentionally left empty } /** * Read the bagit.txt file and return the version and encoding. * * @param bagitFile the bagit.txt file * @return the bag {@link Version} and {@link Charset} encoding of the tag files * * @throws IOException if there is a problem reading a file * @throws UnparsableVersionException if there is a problem parsing the bagit version number * @throws InvalidBagMetadataException if the bagit.txt file does not conform to "key: value" * @throws InvalidBagitFileFormatException if the bagit.txt file does not conform to the bagit spec */ public static SimpleImmutableEntry<Version, Charset> readBagitTextFile(final Path bagitFile) throws IOException, UnparsableVersionException, InvalidBagMetadataException, InvalidBagitFileFormatException{ logger.debug(messages.getString("reading_version_and_encoding"), bagitFile); throwErrorIfByteOrderMarkIsPresent(bagitFile); final List<SimpleImmutableEntry<String, String>> pairs = KeyValueReader.readKeyValuesFromFile(bagitFile, ":", StandardCharsets.UTF_8); String version = null; Charset encoding = null; for(final SimpleImmutableEntry<String, String> pair : pairs){ if("BagIt-Version".equals(pair.getKey())){ version = pair.getValue(); logger.debug(messages.getString("bagit_version"), version); } if("Tag-File-Character-Encoding".equals(pair.getKey())){ encoding = Charset.forName(pair.getValue()); logger.debug(messages.getString("tag_file_encoding"), encoding); } } if(version == null || encoding == null){ throw new InvalidBagitFileFormatException(messages.getString("invalid_bagit_text_file_error")); } return new SimpleImmutableEntry<>(parseVersion(version), encoding); } /* * As per the specification, a BOM is not allowed in the bagit.txt file */ private static void throwErrorIfByteOrderMarkIsPresent(final Path bagitFile) throws IOException, InvalidBagitFileFormatException{ final byte[] firstFewBytesInFile = Arrays.copyOfRange(Files.readAllBytes(bagitFile), 0, BOM.length); if(Arrays.equals(BOM, firstFewBytesInFile)){ final String formattedMessage = messages.getString("bom_present_error"); throw new InvalidBagitFileFormatException(MessageFormatter.format(formattedMessage, bagitFile).getMessage()); } } /* * parses the version string into a {@link Version} object */ public static Version parseVersion(final String version) throws UnparsableVersionException{ if(!version.contains(".")){ throw new UnparsableVersionException(messages.getString("unparsable_version_error"), version); } final String[] parts = version.split("\\."); final int major = Integer.parseInt(parts[0]); final int minor = Integer.parseInt(parts[1]); return new Version(major, minor); } }