/** * JHOVE2 - Next-generation architecture for format-aware characterization * * Copyright (c) 2009 by The Regents of the University of California, * Ithaka Harbors, Inc., and The Board of Trustees of the Leland Stanford * Junior University. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * o Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * o Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * o Neither the name of the University of California/California Digital * Library, Ithaka Harbors/Portico, or Stanford University, nor the names of * its contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ package org.jhove2.module.format.warc; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import java.nio.ByteOrder; import java.util.Collections; import java.util.Date; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.TreeMap; import org.jhove2.annotation.ReportableProperty; import org.jhove2.core.I8R; import org.jhove2.core.JHOVE2; import org.jhove2.core.JHOVE2Exception; import org.jhove2.core.Message; import org.jhove2.core.Message.Severity; import org.jhove2.core.format.Format; import org.jhove2.core.format.FormatIdentification; import org.jhove2.core.format.FormatIdentification.Confidence; import org.jhove2.core.io.Input; import org.jhove2.core.reportable.Reportable; import org.jhove2.core.source.Source; import org.jhove2.core.source.SourceFactory; import org.jhove2.module.Module; import org.jhove2.module.format.BaseFormatModule; import org.jhove2.module.format.Validator; import org.jhove2.module.format.gzip.GzipModule; import org.jhove2.module.format.gzip.GzipModule.GZipOffsetProperty; import org.jhove2.module.format.warc.properties.WarcRecordData; import org.jhove2.persist.FormatModuleAccessor; import org.jwat.common.Diagnosis; import org.jwat.common.Diagnostics; import org.jwat.common.HttpHeader; import org.jwat.common.InputStreamNoSkip; import org.jwat.common.Payload; import org.jwat.common.PayloadWithHeaderAbstract; import org.jwat.common.UriProfile; import org.jwat.warc.WarcReader; import org.jwat.warc.WarcReaderFactory; import org.jwat.warc.WarcRecord; import com.sleepycat.persist.model.Persistent; /** * JHOVE2 WARC module. This class is mostly a JHOVE2 wrapper that uses * the JWAT package for the actual WARC validation. * * @author nicl */ @Persistent public class WarcModule extends BaseFormatModule implements Validator { /** Module version identifier. */ public static final String VERSION = "2.1.0"; /** Module release date. */ public static final String RELEASE = "2013-02-11"; /** Module validation coverage. */ public static final Coverage COVERAGE = Coverage.Selective; /** Whether to recursively characterize WARC record objects. */ private boolean recurse = true; private boolean bComputeBlockDigest = false; private String blockDigestAlgorithm; private String blockDigestEncoding; private boolean bComputePayloadDigest = false; private String payloadDigestAlgorithm; private String payloadDigestEncoding; private boolean bStrictTargetUriValidation = false; private boolean bStrictUriValidation = false; /** * Stores a mapping of all Format aliases in the * {@link org.jhove2.core.I8R.Namespace.MIME MIME namespace} * available via configuration to the JHOVE2 * {@link org.jhove2.core.I8R} for that Format */ private static transient Map<String, FormatIdentification> jhove2Ids = null; /** Validation status. */ private Validity isValid; /** Number of records seen by this instance. */ private int warcRecordNumber; /** WARC filename, from metadata record, if present. */ private String warcFileName; /** WARC file size, whole file. */ private Long warcFileSize; /** Last modified date of the WARC file. */ private Date warcFileLastModified; /** The amount of bytes consumed by the WarcReader. */ private long warcReaderConsumedBytes; /** File version, null unless all records have the same version. */ private String warcFileVersion; /** Versions encountered and their usage count. */ private Map<String, Integer> versions = new TreeMap<String, Integer>(); /** * Instantiate a new <code>WarcModule</code> instance. * This constructor is used by the Spring framework. * @param format Jhove2 Format used by this module to handle WARC * @param formatModuleAccessor FormatModuleAccessor to manage access to Format Profiles */ public WarcModule(Format format, FormatModuleAccessor formatModuleAccessor) { super(VERSION, RELEASE, RIGHTS, format, formatModuleAccessor); this.isValid = Validity.Undetermined; } /** * Instantiate a new <code>WarcModule</code> instance. * This constructor is used by the persistence layer. */ public WarcModule() { this(null, null); } /** * Method for creating test instances. * @return <code>WarcModule</code> instance */ protected WarcModule getTestInstance() { WarcModule warcModule = new WarcModule(format, (FormatModuleAccessor)moduleAccessor); warcModule.isValid = Validity.Undetermined; warcModule.recurse = recurse; warcModule.bComputeBlockDigest = bComputeBlockDigest; warcModule.blockDigestAlgorithm = blockDigestAlgorithm; warcModule.blockDigestEncoding = blockDigestEncoding; warcModule.bComputePayloadDigest = bComputePayloadDigest; warcModule.payloadDigestAlgorithm = payloadDigestAlgorithm; warcModule.payloadDigestEncoding = payloadDigestEncoding; warcModule.bStrictTargetUriValidation = bStrictTargetUriValidation; warcModule.bStrictUriValidation = bStrictUriValidation; return warcModule; } //------------------------------------------------------------------------ // BaseFormatModule contract support //------------------------------------------------------------------------ /* * Parse a WARC file/record. * @see org.jhove2.module.format.BaseFormatModule#parse(org.jhove2.core.JHOVE2, org.jhove2.core.source.Source, org.jhove2.core.io.Input) */ @Override public long parse(JHOVE2 jhove2, Source source, Input input) throws IOException, EOFException, JHOVE2Exception { /* * Cache Content-Types to JHove2 FormatIdentifications. */ if (jhove2Ids == null) { Map<String,String> ids = jhove2.getConfigInfo().getFormatAliasIdsToJ2Ids(I8R.Namespace.MIME); TreeMap<String, FormatIdentification> idsTemp = new TreeMap<String, FormatIdentification>(); for (Entry<String, String> e : ids.entrySet()) { idsTemp.put(e.getKey().toLowerCase(), new FormatIdentification(new I8R(e.getValue()), Confidence.Tentative)); } jhove2Ids = Collections.unmodifiableSortedMap(idsTemp); } /* * SourceFactory for later use. */ SourceFactory sourceFactory = jhove2.getSourceFactory(); if (sourceFactory == null) { throw new JHOVE2Exception("INTERNAL ERROR - JHOVE2 SourceFactory is null"); } /* * Module init. */ long consumed = 0L; isValid = Validity.Undetermined; // No effect unless read methods on the input object are called. input.setByteOrder(ByteOrder.LITTLE_ENDIAN); /* * Module context. */ GzipModule gzipMod = null; WarcModule warcMod = null; Module mod; Source parentSrc = source.getParentSource(); if (parentSrc != null) { List<Module> parentMods = parentSrc.getModules(); for (int i=0; i<parentMods.size(); ++i) { mod = parentMods.get(i); if (mod instanceof GzipModule) { gzipMod = (GzipModule)mod; // Lookup the the GZipModule which is on the call stack. // Required since the JHove2 lookup returns a new instance // populated with persisted values and not the instance on // the call stack. gzipMod = GzipModule.gzipMap.get(gzipMod.instanceId); } if (mod instanceof WarcModule) { // The same goes for the WarcModule except we do not need // any transient fields here. warcMod = (WarcModule)mod; } } } /* * Reportable: Filename, file size, etc. */ if (!source.isTemp()) { warcFileName = source.getFile().getName(); warcFileSize = source.getFile().length(); warcFileLastModified = new Date(source.getFile().lastModified()); } else if (parentSrc != null && !parentSrc.isTemp()) { warcFileName = parentSrc.getFile().getName(); warcFileSize = parentSrc.getFile().length(); warcFileLastModified = new Date(parentSrc.getFile().lastModified()); } /* * Read some WARC records. */ WarcReader reader = null; if (gzipMod != null) { // Obtain GZip startOffset from dummy property. long offset = -1; List<Reportable> gzipProps = source.getExtraProperties(); Reportable prop; int i = 0; while (i<gzipProps.size()) { prop = gzipProps.get(i); if (prop instanceof GZipOffsetProperty) { offset = ((GZipOffsetProperty)prop).offset; gzipProps.remove(i); // ... source.getSourceAccessor().persistSource(source); } else { ++i; } } // Better safe than sorry. gzipMod.presumptiveFormat = new FormatIdentification(format.getIdentifier(), Confidence.Tentative); /* * GZip compressed. */ reader = (WarcReader)gzipMod.reader; if (reader == null) { reader = WarcReaderFactory.getReaderUncompressed(); setReaderOptions(reader); gzipMod.reader = reader; } if (warcMod == null) { /* * First record. (Unless the parent modules are not correct!) */ mod = parentSrc.addModule(this); parseRecordsCompressed(jhove2, sourceFactory, source, reader, offset); } else { warcMod.parseRecordsCompressed(jhove2, sourceFactory, source, reader, offset); // Validity if (warcMod.isValid != Validity.False) { if (reader.isCompliant()) { warcMod.isValid = Validity.True; } else { warcMod.isValid = Validity.False; } } // Reportable. warcMod.warcReaderConsumedBytes = reader.getConsumed(); if (warcMod.versions.size() == 1) { Entry<String, Integer> entry = warcMod.versions.entrySet().iterator().next(); if (entry.getValue() == warcMod.warcRecordNumber) { warcMod.warcFileVersion = entry.getKey(); } } warcMod = (WarcModule)warcMod.getModuleAccessor().persistModule(warcMod); // Remove WarcModule from source instance since we added one to the parent source. // this.setParentSourceId(null); this.getParentSource().deleteModule(this); // Reader diagnostics. reportValidationErrors(source, reader.diagnostics, jhove2); reader.diagnostics.reset(); // Source update. source = source.getSourceAccessor().persistSource(source); } consumed = reader.getConsumed(); } else { /* * Not GZip compressed. */ reader = WarcReaderFactory.getReaderUncompressed(new InputStreamNoSkip(source.getInputStream()), 8192); setReaderOptions(reader); parseRecordsUncompressed(jhove2, sourceFactory, source, reader); reader.close(); consumed = reader.getConsumed(); // Reader diagnostics. reportValidationErrors(source, reader.diagnostics, jhove2); reader.diagnostics.reset(); // Reportable. warcReaderConsumedBytes = reader.getConsumed(); if (versions.size() == 1) { Entry<String, Integer> entry = versions.entrySet().iterator().next(); if (entry.getValue() == warcRecordNumber) { warcFileVersion = entry.getKey(); } } /* * Validity. */ if (isValid != Validity.False) { if (reader.isCompliant()) { isValid = Validity.True; } else { isValid = Validity.False; } } } /* * Consumed. */ return consumed; } /** * Set digest options for WARC reader. * @param reader WARC reader instance */ protected void setReaderOptions(WarcReader reader) throws JHOVE2Exception { reader.setBlockDigestEnabled(bComputeBlockDigest); reader.setPayloadDigestEnabled(bComputePayloadDigest); if (!reader.setBlockDigestAlgorithm(blockDigestAlgorithm)) { throw new JHOVE2Exception("Invalid block digest algorithm: " + blockDigestAlgorithm); } if (!reader.setPayloadDigestAlgorithm(payloadDigestAlgorithm)) { throw new JHOVE2Exception("Invalid payload digest algorithm: " + payloadDigestAlgorithm); } reader.setBlockDigestEncoding(blockDigestEncoding); reader.setPayloadDigestEncoding(payloadDigestEncoding); if (bStrictTargetUriValidation) { reader.setWarcTargetUriProfile(UriProfile.RFC3986); } else { reader.setWarcTargetUriProfile(UriProfile.RFC3986_ABS_16BIT_LAX); } if (bStrictUriValidation) { reader.setUriProfile(UriProfile.RFC3986); } else { reader.setUriProfile(UriProfile.RFC3986_ABS_16BIT_LAX); } } /** * Parse WARC records that are not encased in GZip entries. Parsing should * should be straight forward with all records accessible through the same * source. * @param jhove2 the JHove2 characterization context * @param sourceFactory JHove2 source factory * @param parentSource WARC source unit * @param reader WARC reader used to parse records * @throws EOFException if EOF occurs prematurely * @throws IOException if an IO error occurs while processing * @throws JHOVE2Exception if a serious problem needs to be reported */ protected void parseRecordsUncompressed(JHOVE2 jhove2, SourceFactory sourceFactory, Source parentSource, WarcReader reader) throws EOFException, IOException, JHOVE2Exception { WarcRecord record; // Ensure a WARC reader could be instantiated. if (reader != null) { parentSource.setIsAggregate(true); /* * Loop through available records. */ while ((record = reader.getNextRecord()) != null) { processRecord(jhove2, sourceFactory, parentSource, record); } } else { throw new JHOVE2Exception("WarcReader is null"); } } /** * Parse WARC record(s) where the source has been identified as a source of * a GZip module instance. Since each record will presumably be parse from * a different source alternative methods in the WARC reader will be used. * @param jhove2 the JHove2 characterization context * @param sourceFactory JHove2 source factory * @param parentSource WARC source unit * @param reader WARC reader used to parse records * @param offset record offset relative to input stream * @throws EOFException if EOF occurs prematurely * @throws IOException if an IO error occurs while processing * @throws JHOVE2Exception if a serious problem needs to be reported */ protected void parseRecordsCompressed(JHOVE2 jhove2, SourceFactory sourceFactory, Source parentSource, WarcReader reader, Long offset) throws EOFException, IOException, JHOVE2Exception { WarcRecord record; // Ensure a WARC reader could be instantiated. if (reader != null) { parentSource.setIsAggregate(true); InputStream in = parentSource.getInputStream(); /* * Loop through available records. */ while ((record = reader.getNextRecordFrom(in, offset, 8192)) != null) { processRecord(jhove2, sourceFactory, parentSource, record); } } else { throw new JHOVE2Exception("WarcReader is null"); } } /** * Process a WARC record. Adds a <code>WarcRecordSource</code> child * to the supplied input source. Relevant reportable properties are added * to the <code>WarcRecordSource</code>. If there is a payload present in * the record, steps are taken to characterize it. The content-type of the * payload is established from the record itself or a leading * http response. The content-type is added as a presumptive format on the * embedded source. * @param jhove2 the JHove2 characterization context * @param sourceFactory JHove2 source factory * @param parentSource WARC source unit * @param record WARC record from WARC reader * @throws EOFException if EOF occurs prematurely * @throws IOException if an IO error occurs while processing * @throws JHOVE2Exception if a serious problem needs to be reported */ protected void processRecord(JHOVE2 jhove2, SourceFactory sourceFactory, Source parentSource, WarcRecord record) throws EOFException, IOException, JHOVE2Exception { Payload payload; PayloadWithHeaderAbstract payloadHeaderWrapped; HttpHeader httpHeader; InputStream payload_stream; WarcRecordData recordData; String contentType; FormatIdentification formatId; contentType = record.header.contentTypeStr; /* * WARC Record Source. */ Source recordSrc = new WarcRecordSource(); recordSrc.setSourceAccessor(sourceFactory.createSourceAccessor(recordSrc)); recordSrc.setDeleteTempFileOnClose(jhove2.getInvocation().getDeleteTempFilesOnClose()); recordSrc = parentSource.addChildSource(recordSrc); ++warcRecordNumber; /* * Version. */ if (record.header.bValidVersionFormat) { Integer count = versions.get(record.header.versionStr); if (count == null) { count = 0; } ++count; versions.put(record.header.versionStr, count); } /* * Prepare payload. */ payload = record.getPayload(); httpHeader = null; payload_stream = null; if (payload != null) { payloadHeaderWrapped = payload.getPayloadHeaderWrapped(); if (payloadHeaderWrapped instanceof HttpHeader) { httpHeader = (HttpHeader)payloadHeaderWrapped; } if (httpHeader == null) { payload_stream = payload.getInputStream(); } else { contentType = httpHeader.getProtocolContentType(); payload_stream = httpHeader.getPayloadInputStream(); } } /* * Decide on Jhove2 format from contentType information. */ if (contentType != null) { int idx = contentType.indexOf(';'); if (idx >= 0) { contentType = contentType.substring(0, idx); } } formatId = null; if (contentType != null) { formatId = jhove2Ids.get(contentType.toLowerCase()); } /* * Characterize payload. */ if (recurse && payload_stream != null) { characterizePayload(jhove2, sourceFactory, recordSrc, payload_stream, formatId); } if (payload_stream != null) { payload_stream.close(); } /* * Close record to finish validation. */ if (payload != null) { payload.close(); } record.close(); /* * Properties. */ recordData = new WarcRecordData(record); recordSrc.addExtraProperties(recordData.getWarcRecordBaseProperties()); recordSrc.addExtraProperties(recordData.getWarcTypeProperties(record)); recordSrc.close(); /* * Report errors. */ reportValidationErrors(recordSrc, record.diagnostics, jhove2); } /** * Process a WARC record payload, recursively if configured to do so. * @param jhove2 the JHove2 characterization context * @param sourceFactory JHove2 source factory * @param recordSrc WARC record source unit * @param payload_stream payload inputstream * @param formatId JHove2 format identification based on contentType * @throws EOFException if EOF occurs prematurely * @throws IOException if an IO error occurs while processing * @throws JHOVE2Exception if a serious problem needs to be reported */ protected void characterizePayload(JHOVE2 jhove2, SourceFactory sourceFactory, Source recordSrc, InputStream payload_stream, FormatIdentification formatId) throws EOFException, IOException, JHOVE2Exception { // Not all properties are ready yet, they are added as extras. String name = null; Source payloadSrc = sourceFactory.getSource(jhove2, payload_stream, name, null); if (payloadSrc != null) { payloadSrc.setDeleteTempFileOnClose(jhove2.getInvocation().getDeleteTempFilesOnClose()); payloadSrc = recordSrc.addChildSource(payloadSrc); // Add presumptive format based on content-type. if(formatId != null){ payloadSrc = payloadSrc.addPresumptiveFormat(formatId); } /* Make sure to close the Input after * characterization is completed. */ Input src_input = payloadSrc.getInput(jhove2); try { payloadSrc = jhove2.characterize(payloadSrc, src_input); } finally { if (src_input != null) { src_input.close(); } } payloadSrc.close(); } } /** * Reports validation errors/warnings on <code>Source</code>, if any. * @param src WARC source unit * @param diagnostics diagnostics object with possible errors/warnings. * @param jhove2 the JHove2 characterization context. * @throws IOException if an IO error occurs while processing * @throws JHOVE2Exception if a serious problem needs to be reported */ private void reportValidationErrors(Source src, Diagnostics<Diagnosis> diagnostics, JHOVE2 jhove2) throws JHOVE2Exception, IOException { if (diagnostics.hasErrors()) { // Report errors on source object. for (Diagnosis d : diagnostics.getErrors()) { src.addMessage(newValidityError(jhove2, Message.Severity.ERROR, d.type.toString().toLowerCase(), d.getMessageArgs())); //updateMap(e.error.toString() + '-' + e.field, this.errors); } } if (diagnostics.hasWarnings()) { // Report warnings on source object. for (Diagnosis d : diagnostics.getWarnings()) { src.addMessage(newValidityError(jhove2, Message.Severity.WARNING, d.type.toString().toLowerCase(), d.getMessageArgs())); } } } /** * Instantiates a new localized message. * @param jhove2 the JHove2 characterization context. * @param severity message severity * @param id the configuration property relative name * @param params the values to add in the message * @return the new localized message * @throws JHOVE2Exception if a serious problem needs to be reported */ private Message newValidityError(JHOVE2 jhove2, Severity severity, String id, Object[] messageArgs) throws JHOVE2Exception { return new Message(severity, Message.Context.OBJECT, this.getClass().getName() + '.' + id, messageArgs, jhove2.getConfigInfo()); } //------------------------------------------------------------------------ // Validator interface support //------------------------------------------------------------------------ /** * Validate the Zip file, which in this case amounts to returning the * result since validation has already been done. * @param jhove2 JHOVE2 framework object * @param source Zip file source unit * @param input Zip file source input * @see org.jhove2.module.format.Validator#validate(org.jhove2.core.JHOVE2, org.jhove2.core.source.Source, org.jhove2.core.io.Input) */ @Override public Validity validate(JHOVE2 jhove2, Source source, Input input) throws JHOVE2Exception { return isValid(); } /** Get validation coverage. * @return Validation coverage * @see org.jhove2.module.format.Validator#getCoverage() */ @Override public Coverage getCoverage() { return COVERAGE; } /** Get validity. * @return Validity * @see org.jhove2.module.format.Validator#isValid() */ @Override public Validity isValid() { return isValid; } //------------------------------------------------------------------------ // Reportable properties //------------------------------------------------------------------------ /** * Returns the WARC filename. * @return the WARC filename */ @ReportableProperty(order=1, value="WARC filename") public String getWarcFileName() { return warcFileName; } /** * Returns the size of the WARC file. * @return the size of the WARC file */ @ReportableProperty(order=2, value="WARC file size, in bytes") public long getWarcFileSize() { return warcFileSize; } /** * Returns WARC file last modified date. * @return WARC file last modified date */ @ReportableProperty(order=3, value="WARC file last modified date") public Date getLastModified() { return warcFileLastModified; } /** * Returns number of WARC records. * @return number of WARC records */ @ReportableProperty(order=4, value="The number of WARC records") public int getWarcRecordNumber() { return warcRecordNumber; } /** * warcReaderConsumedBytes getter * @return the warcReaderConsumedBytes */ @ReportableProperty(order=5, value="WARC reader consumed bytes, in bytes") public long getWarcReaderConsumedBytes() { return warcReaderConsumedBytes; } /** * Returns the file version if it is the same for all records. * @return the file version */ @ReportableProperty(order=6, value="File version") public String getFileVersion() { return warcFileVersion; } //------------------------------------------------------------------------ // Specific implementation //------------------------------------------------------------------------ /** * Sets whether to recursively characterize WARC record objects. * @param recurse whether to recursively characterize WARC record objects. */ public void setRecurse(boolean recurse) { this.recurse = recurse; } /** * Enable or disable block digest computation. * @param bComputeBlockDigest block digest computation toggle */ public void setComputeBlockDigest(boolean bComputeBlockDigest) { this.bComputeBlockDigest = bComputeBlockDigest; } /** * Set the block digest algorithm to be used in case no digest is present * in the WARC header. * @param blockDigestAlgorithm block digest algorithm */ public void setBlockDigestAlgorithm(String blockDigestAlgorithm) { this.blockDigestAlgorithm = blockDigestAlgorithm; } /** * Set the block digest encoding scheme to be used in case no digest * is present in the WARC header. * @param blockDigestEncoding block digest encoding scheme */ public void setBlockDigestEncoding(String blockDigestEncoding) { this.blockDigestEncoding = blockDigestEncoding; } /** * Enable or disable payload digest computation. * @param bComputePayloadDigest payload digest computation toggle */ public void setComputePayloadDigest(boolean bComputePayloadDigest) { this.bComputePayloadDigest = bComputePayloadDigest; } /** * Set the payload digest algorithm to be used in case no digest is present * in the WARC header. * @param payloadDigestAlgorithm payload digest algorithm */ public void setPayloadDigestAlgorithm(String payloadDigestAlgorithm) { this.payloadDigestAlgorithm = payloadDigestAlgorithm; } /** * Set the payload digest encoding scheme to be used in case no digest * is present in the WARC header. * @param payloadDigestEncoding payload digest encoding scheme */ public void setPayloadDigestEncoding(String payloadDigestEncoding) { this.payloadDigestEncoding = payloadDigestEncoding; } /** * Enable or disable strict Target URI validation. * @param bStrictTargetUriValidation enable strict target URI validation switch */ public void setStrictTargetUriValidation(boolean bStrictTargetUriValidation) { this.bStrictTargetUriValidation = bStrictTargetUriValidation; } /** * Enable or disable strict URI validation. * @param bStrictTargetUriValidation enable strict URI validation switch */ public void setStrictUriValidation(boolean bStrictUriValidation) { this.bStrictUriValidation = bStrictUriValidation; } }