package gov.loc.repository.bagit.conformance; import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.AbstractMap.SimpleImmutableEntry; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.ResourceBundle; import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.databind.JsonMappingException; import gov.loc.repository.bagit.conformance.profile.BagitProfile; import gov.loc.repository.bagit.domain.Bag; import gov.loc.repository.bagit.domain.Version; import gov.loc.repository.bagit.exceptions.InvalidBagMetadataException; import gov.loc.repository.bagit.exceptions.InvalidBagitFileFormatException; import gov.loc.repository.bagit.exceptions.UnparsableVersionException; import gov.loc.repository.bagit.exceptions.conformance.BagitVersionIsNotAcceptableException; import gov.loc.repository.bagit.exceptions.conformance.FetchFileNotAllowedException; import gov.loc.repository.bagit.exceptions.conformance.MetatdataValueIsNotAcceptableException; import gov.loc.repository.bagit.exceptions.conformance.RequiredManifestNotPresentException; import gov.loc.repository.bagit.exceptions.conformance.RequiredMetadataFieldNotPresentException; import gov.loc.repository.bagit.exceptions.conformance.RequiredTagFileNotPresentException; import gov.loc.repository.bagit.reader.BagitTextFileReader; import gov.loc.repository.bagit.reader.KeyValueReader; import gov.loc.repository.bagit.verify.BagVerifier; /** * Responsible for checking a bag and providing insight into how it cause problems. */ public final class BagLinter { private static final Logger logger = LoggerFactory.getLogger(BagLinter.class); private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle"); private static final Version VERSION_1_0 = new Version(1,0); private BagLinter(){ // intentionally left empty } /** * Check a bag against a bagit-profile as described by * <a href="https://github.com/ruebot/bagit-profiles">https://github.com/ruebot/bagit-profiles</a> * <br>Note: <b> This implementation does not check the Serialization part of the profile!</b> * * @param jsonProfile the input stream to the json string describing the profile * @param bag the bag to check against the profile * * @throws IOException if there is a problem reading the profile * @throws JsonMappingException if there is a problem mapping the profile to the {@link BagitProfile} * @throws JsonParseException if there is a problem parsing the json while mapping to java object * * @throws FetchFileNotAllowedException if there is a fetch file when the profile prohibits it * @throws MetatdataValueIsNotAcceptableException if a metadata value is not in the list of acceptable values * @throws RequiredMetadataFieldNotPresentException if a metadata field is not present but it should be * @throws RequiredManifestNotPresentException if a payload or tag manifest type is not present but should be * @throws BagitVersionIsNotAcceptableException if the version of the bag is not in the list of acceptable versions * @throws RequiredTagFileNotPresentException if a tag file is not present but should be */ public static void checkAgainstProfile(final InputStream jsonProfile, final Bag bag) throws JsonParseException, JsonMappingException, IOException, FetchFileNotAllowedException, RequiredMetadataFieldNotPresentException, MetatdataValueIsNotAcceptableException, RequiredManifestNotPresentException, BagitVersionIsNotAcceptableException, RequiredTagFileNotPresentException{ BagProfileChecker.bagConformsToProfile(jsonProfile, bag); } /** * The BagIt specification is very flexible in what it allows which leads to situations * where something may be technically allowed, but should be discouraged. * This method checks a bag for potential problems, or other items that are allowed but discouraged. * This <strong>does not</strong> validate a bag. See {@link BagVerifier} instead. * * @param rootDir the root directory of the bag * * @return a set of {@link BagitWarning} detailing all items that should be fixed. * * @throws InvalidBagMetadataException if the bag metadata does not conform to the bagit specification * @throws UnparsableVersionException if there is an error reading the bagit version * @throws IOException if there was an error reading a file */ public static Set<BagitWarning> lintBag(final Path rootDir) throws IOException, UnparsableVersionException, InvalidBagMetadataException, InvalidBagitFileFormatException{ return lintBag(rootDir, Collections.emptyList()); } /** * The BagIt specification is very flexible in what it allows which leads to situations * where something may be technically allowed, but should be discouraged. * This method checks a bag for potential problems, or other items that are allowed but discouraged. * This <strong>does not</strong> validate a bag. See {@link BagVerifier} instead. * * @param rootDir the root directory of the bag * @param warningsToIgnore any {@link BagitWarning} to ignore when linting * * @return a set of {@link BagitWarning} detailing all items that should be fixed. * * @throws InvalidBagMetadataException if the bag metadata does not conform to the bagit specification * @throws UnparsableVersionException if there is an error reading the bagit version * @throws IOException if there was an error reading a file */ public static Set<BagitWarning> lintBag(final Path rootDir, final Collection<BagitWarning> warningsToIgnore) throws IOException, UnparsableVersionException, InvalidBagMetadataException, InvalidBagitFileFormatException{ final Set<BagitWarning> warnings = new HashSet<>(); //@Incubating Path bagitDir = rootDir.resolve(".bagit"); if(!Files.exists(bagitDir)){ bagitDir = rootDir; } final Path bagitFile = bagitDir.resolve("bagit.txt"); checkForExtraLines(bagitFile, warnings, warningsToIgnore); final SimpleImmutableEntry<Version, Charset> bagitInfo = BagitTextFileReader.readBagitTextFile(bagitFile); logger.info(messages.getString("checking_encoding_problems")); EncodingChecker.checkEncoding(bagitInfo.getValue(), warnings, warningsToIgnore); logger.info(messages.getString("checking_latest_version")); VersionChecker.checkVersion(bagitInfo.getKey(), warnings, warningsToIgnore); logger.info(messages.getString("checking_manifest_problems")); ManifestChecker.checkManifests(bagitDir, bagitInfo.getValue(), warnings, warningsToIgnore); logger.info(messages.getString("checking_metadata_problems")); MetadataChecker.checkBagMetadata(bagitDir, bagitInfo.getValue(), warnings, warningsToIgnore); return warnings; } private static void checkForExtraLines(final Path bagitFile, final Collection<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore) throws InvalidBagMetadataException, IOException, UnparsableVersionException{ if(warningsToIgnore.contains(BagitWarning.EXTRA_LINES_IN_BAGIT_FILES)){ logger.debug(messages.getString("skipping_check_extra_lines")); return; } logger.debug(messages.getString("checking_extra_lines")); final List<SimpleImmutableEntry<String, String>> pairs = KeyValueReader.readKeyValuesFromFile(bagitFile, ":", StandardCharsets.UTF_8); for(final SimpleImmutableEntry<String, String> pair : pairs){ if("BagIt-Version".equals(pair.getKey())){ final Version version = BagitTextFileReader.parseVersion(pair.getValue()); //versions before 1.0 specified it must be exactly 2 lines if(pairs.size() > 2 && version.isOlder(VERSION_1_0)){ logger.warn(messages.getString("extra_lines_warning"), pairs.size()); warnings.add(BagitWarning.EXTRA_LINES_IN_BAGIT_FILES); } } } } }