/** * JHOVE2 - Next-generation architecture for format-aware characterization * * Copyright (c) 2009 by The Regents of the University of California, * Ithaka Harbors, Inc., and The Board of Trustees of the Leland Stanford * Junior University. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * o Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * o Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * o Neither the name of the University of California/California Digital * Library, Ithaka Harbors/Portico, or Stanford University, nor the names of * its contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ package org.jhove2.module.format.arc; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import java.nio.ByteOrder; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.TreeMap; import org.jhove2.annotation.ReportableProperty; import org.jhove2.core.I8R; import org.jhove2.core.JHOVE2; import org.jhove2.core.JHOVE2Exception; import org.jhove2.core.Message; import org.jhove2.core.Message.Severity; import org.jhove2.core.format.Format; import org.jhove2.core.format.FormatIdentification; import org.jhove2.core.format.FormatIdentification.Confidence; import org.jhove2.core.io.Input; import org.jhove2.core.reportable.Reportable; import org.jhove2.core.source.Source; import org.jhove2.core.source.SourceFactory; import org.jhove2.module.Module; import org.jhove2.module.format.BaseFormatModule; import org.jhove2.module.format.Validator; import org.jhove2.module.format.arc.properties.ArcRecordData; import org.jhove2.module.format.gzip.GzipModule; import org.jhove2.module.format.gzip.GzipModule.GZipOffsetProperty; import org.jhove2.persist.FormatModuleAccessor; import org.jwat.arc.ArcReader; import org.jwat.arc.ArcReaderFactory; import org.jwat.arc.ArcRecordBase; import org.jwat.common.Diagnosis; import org.jwat.common.Diagnostics; import org.jwat.common.HttpHeader; import org.jwat.common.InputStreamNoSkip; import org.jwat.common.Payload; import org.jwat.common.PayloadWithHeaderAbstract; import org.jwat.common.UriProfile; import com.sleepycat.persist.model.Persistent; /** * JHOVE2 ARC module. This class is mostly a JHOVE2 wrapper that uses * the JWAT package for the actual ARC validation. * * @author lbihanic, selghissassi, nicl */ @Persistent public class ArcModule extends BaseFormatModule implements Validator { /** Module version identifier. */ public static final String VERSION = "2.1.0"; /** Module release date. */ public static final String RELEASE = "2013-02-11"; /** Module validation coverage. */ public static final Coverage COVERAGE = Coverage.Selective; /** Whether to recursively characterize ARC record objects. */ private boolean recurse = true; private boolean bComputeBlockDigest = false; private String blockDigestAlgorithm; private String blockDigestEncoding; private boolean bComputePayloadDigest = false; private String payloadDigestAlgorithm; private String payloadDigestEncoding; private boolean bStrictUriValidation = true; /** * Stores a mapping of all Format aliases in the * {@link org.jhove2.core.I8R.Namespace.MIME MIME namespace} * available via configuration to the JHOVE2 * {@link org.jhove2.core.I8R} for that Format */ private static transient Map<String, FormatIdentification> jhove2Ids = null; /** Validation status. */ private Validity isValid; /** Used protocols. */ private Map<String, Integer> protocols = new HashMap<String, Integer>(); /** The name of the ARC file. */ private String arcFileName; /** The size of the ARC file, in bytes. */ private Long arcFileSize; /** Last modified date of the ARC file. */ private Date arcFileLastModified; /** The number of records. */ private int recordNumber; /** The number or ARC records. */ private int arcRecordNumber; /** The amount of bytes consumed by the ArcReader. */ private long arcReaderConsumedBytes; /** File version from version block. */ private String arcFileVersion; /** Block description version, null unless all records have the same block description version. */ private Integer arcBlockDescVersion; /** Versions encountered and their usage count. */ private Map<Integer, Integer> blockDescVersions = new TreeMap<Integer, Integer>(); /** * Instantiate a new <code>ArcModule</code> instance. * This constructor is used by the Spring framework. * @param format Jhove2 Format used by this module to handle ARC * @param formatModuleAccessor FormatModuleAccessor to manage access to Format Profiles */ public ArcModule(Format format, FormatModuleAccessor formatModuleAccessor) { super(VERSION, RELEASE, RIGHTS, format, formatModuleAccessor); this.isValid = Validity.Undetermined; } /** * Instantiate a new <code>ArcModule</code> instance. * This constructor is used by the persistence layer. */ public ArcModule() { this(null, null); } /** * Method for creating test instances. * @return <code>ArcModule</code> instance */ protected ArcModule getTestInstance() { ArcModule arcModule = new ArcModule(format, (FormatModuleAccessor)moduleAccessor); arcModule.isValid = Validity.Undetermined; arcModule.recurse = recurse; arcModule.bComputeBlockDigest = bComputeBlockDigest; arcModule.blockDigestAlgorithm = blockDigestAlgorithm; arcModule.blockDigestEncoding = blockDigestEncoding; arcModule.bComputePayloadDigest = bComputePayloadDigest; arcModule.payloadDigestAlgorithm = payloadDigestAlgorithm; arcModule.payloadDigestEncoding = payloadDigestEncoding; arcModule.bStrictUriValidation = bStrictUriValidation; return arcModule; } //------------------------------------------------------------------------ // BaseFormatModule contract support //------------------------------------------------------------------------ /* * Parse an ARC file/record. * @see org.jhove2.module.format.BaseFormatModule#parse(org.jhove2.core.JHOVE2, org.jhove2.core.source.Source, org.jhove2.core.io.Input) */ @Override public long parse(JHOVE2 jhove2,Source source, Input input) throws IOException, EOFException, JHOVE2Exception { /* * Cache Content-Types to JHove2 FormatIdentifications. */ if (jhove2Ids == null) { Map<String,String> ids = jhove2.getConfigInfo().getFormatAliasIdsToJ2Ids(I8R.Namespace.MIME); TreeMap<String, FormatIdentification> idsTemp = new TreeMap<String, FormatIdentification>(); for (Entry<String, String> e : ids.entrySet()){ idsTemp.put(e.getKey().toLowerCase(), new FormatIdentification(new I8R(e.getValue()), Confidence.Tentative)); } jhove2Ids = Collections.unmodifiableSortedMap(idsTemp); } /* * SourceFactory for later use. */ SourceFactory sourceFactory = jhove2.getSourceFactory(); if (sourceFactory == null) { throw new JHOVE2Exception("INTERNAL ERROR - JHOVE2 SourceFactory is null"); } /* * Module init. */ long consumed = 0L; isValid = Validity.Undetermined; // No effect unless read methods on the input object are called. input.setByteOrder(ByteOrder.LITTLE_ENDIAN); /* * Module context. */ GzipModule gzipMod = null; ArcModule arcMod = null; Module mod; Source parentSrc = source.getParentSource(); if (parentSrc != null) { List<Module> parentMods = parentSrc.getModules(); for (int i=0; i<parentMods.size(); ++i) { mod = parentMods.get(i); if (mod instanceof GzipModule) { gzipMod = (GzipModule)mod; // Lookup the the GZipModule which is on the call stack. // Required since the JHove2 lookup returns a new instance // populated with persisted values and not the instance on // the call stack. gzipMod = GzipModule.gzipMap.get(gzipMod.instanceId); } if (mod instanceof ArcModule) { // The same goes for the WarcModule except we do not need // any transient fields here. arcMod = (ArcModule)mod; } } } /* * Reportable: Filename, file size, etc. */ if (!source.isTemp()) { arcFileName = source.getFile().getName(); arcFileSize = source.getFile().length(); arcFileLastModified = new Date(source.getFile().lastModified()); } else if (parentSrc != null && !parentSrc.isTemp()) { arcFileName = parentSrc.getFile().getName(); arcFileSize = parentSrc.getFile().length(); arcFileLastModified = new Date(parentSrc.getFile().lastModified()); } /* * Read some ARC records. */ ArcReader reader = null; if (gzipMod != null) { // Obtain GZip startOffset from dummy property. long offset = -1; List<Reportable> gzipProps = source.getExtraProperties(); Reportable prop; int i = 0; while (i<gzipProps.size()) { prop = gzipProps.get(i); if (prop instanceof GZipOffsetProperty) { offset = ((GZipOffsetProperty)prop).offset; gzipProps.remove(i); // ... source.getSourceAccessor().persistSource(source); } else { ++i; } } // This should probably be changed according to success reading VersionBlock. gzipMod.presumptiveFormat = new FormatIdentification(format.getIdentifier(), Confidence.Tentative); /* * GZip compressed. */ reader = (ArcReader)gzipMod.reader; if (reader == null) { reader = ArcReaderFactory.getReaderUncompressed(); setReaderOptions(reader); gzipMod.reader = reader; } if (arcMod == null) { /* * First record. (Unless the parent modules are not correct!) */ mod = parentSrc.addModule(this); parseRecordsCompressed(jhove2, sourceFactory, source, reader, offset, true); } else { arcMod.parseRecordsCompressed(jhove2, sourceFactory, source, reader, offset, false); // Validity if (arcMod.isValid != Validity.False) { if (reader.isCompliant()) { arcMod.isValid = Validity.True; } else { arcMod.isValid = Validity.False; } } arcMod.arcReaderConsumedBytes = reader.getConsumed(); if (arcMod.blockDescVersions.size() == 1) { Entry<Integer, Integer> entry = arcMod.blockDescVersions.entrySet().iterator().next(); if (entry.getValue() == arcMod.recordNumber) { arcMod.arcBlockDescVersion = entry.getKey(); } } arcMod = (ArcModule)arcMod.getModuleAccessor().persistModule(arcMod); // Remove ArcModule from source instance since we added one to the parent source. // this.setParentSourceId(null); this.getParentSource().deleteModule(this); // Reader diagnostics. reportValidationErrors(source, reader.diagnostics, jhove2); reader.diagnostics.reset(); // Source update. source = source.getSourceAccessor().persistSource(source); } consumed = reader.getConsumed(); } else { /* * Not GZip compressed. */ reader = ArcReaderFactory.getReaderUncompressed(new InputStreamNoSkip(source.getInputStream()), 8192); setReaderOptions(reader); parseRecordsUncompressed(jhove2, sourceFactory, source, reader, true); reader.close(); consumed = reader.getConsumed(); // Reader diagnostics. reportValidationErrors(source, reader.diagnostics, jhove2); reader.diagnostics.reset(); // Reportable. arcReaderConsumedBytes = reader.getConsumed(); if (blockDescVersions.size() == 1) { Entry<Integer, Integer> entry = blockDescVersions.entrySet().iterator().next(); if (entry.getValue() == recordNumber) { arcBlockDescVersion = entry.getKey(); } } /* * Validity. */ if (isValid != Validity.False) { if (reader.isCompliant()) { isValid = Validity.True; } else { isValid = Validity.False; } } } /* * Consumed. */ return consumed; } /** * Set digest options for ARC reader. * @param reader ARC reader instance */ protected void setReaderOptions(ArcReader reader) throws JHOVE2Exception { reader.setBlockDigestEnabled(bComputeBlockDigest); reader.setPayloadDigestEnabled(bComputePayloadDigest); if (!reader.setBlockDigestAlgorithm(blockDigestAlgorithm)) { throw new JHOVE2Exception("Invalid block digest algorithm: " + blockDigestAlgorithm); } if (!reader.setPayloadDigestAlgorithm(payloadDigestAlgorithm)) { throw new JHOVE2Exception("Invalid payload digest algorithm: " + payloadDigestAlgorithm); } reader.setBlockDigestEncoding(blockDigestEncoding); reader.setPayloadDigestEncoding(payloadDigestEncoding); if (bStrictUriValidation) { reader.setUriProfile(UriProfile.RFC3986); } else { reader.setUriProfile(UriProfile.RFC3986_ABS_16BIT_LAX); } } /** * Parse ARC records that are not encased in GZip entries. Parsing should * should be straight forward with all records accessible through the same * source. The version block is only read if the reader was initialized * during this module call. * @param jhove2 the JHove2 characterization context * @param sourceFactory JHove2 source factory * @param parentSource ARC source unit * @param reader ARC reader used to parse records * @param bReadVersion read version block first or go straight to parsing records * @throws EOFException if EOF occurs prematurely * @throws IOException if an IO error occurs while processing * @throws JHOVE2Exception if a serious problem needs to be reported */ protected void parseRecordsUncompressed(JHOVE2 jhove2, SourceFactory sourceFactory, Source parentSource, ArcReader reader, boolean bReadVersion) throws EOFException, IOException, JHOVE2Exception { ArcRecordBase record; // Ensure a ARC reader could be instantiated. if (reader != null) { parentSource.setIsAggregate(true); /* * Loop through available records. */ while ((record = reader.getNextRecord()) != null) { processRecord(jhove2, sourceFactory, parentSource, record); } } else { throw new JHOVE2Exception("ArcReader is null"); } } /** * Parse ARC record(s) where the source has been identified as a source of * a GZip module instance. Since each record will presumably be parse from * a different source alternative methods in the ARC reader will be used. * The version block is only read if the reader was initialized * during this module call. * @param jhove2 the JHove2 characterization context * @param sourceFactory JHove2 source factory * @param parentSource ARC source unit * @param reader ARC reader used to parse records * @param offset record offset relative to input stream * @param bReadVersion read version block first or go straight to parsing records * @throws EOFException if EOF occurs prematurely * @throws IOException if an IO error occurs while processing * @throws JHOVE2Exception if a serious problem needs to be reported */ protected void parseRecordsCompressed(JHOVE2 jhove2, SourceFactory sourceFactory, Source parentSource, ArcReader reader, Long offset, boolean bReadVersion) throws EOFException, IOException, JHOVE2Exception { ArcRecordBase record; // Ensure a ARC reader could be instantiated. if (reader != null) { parentSource.setIsAggregate(true); InputStream in = parentSource.getInputStream(); /* * Loop through available records. */ while ((record = reader.getNextRecordFrom(in, offset, 8192)) != null) { processRecord(jhove2, sourceFactory, parentSource, record); } } else { throw new JHOVE2Exception("ArcReader is null"); } } /** * Process an ARC record. Adds an <code>ArcRecordSource</code> child * to the supplied input source. Relevant reportable properties are added * to the <code>ArcRecordSource</code>. If there is a payload present in * the record, steps are taken to characterize it. The content-type of the * payload is established from the record itself or a leading * http response. The content-type is added as a presumptive format on the * embedded source. * @param jhove2 the JHove2 characterization context * @param sourceFactory JHove2 source factory * @param parentSource ARC source unit * @param record ARC record from ARC reader * @throws EOFException if EOF occurs prematurely * @throws IOException if an IO error occurs while processing * @throws JHOVE2Exception if a serious problem needs to be reported */ protected void processRecord(JHOVE2 jhove2, SourceFactory sourceFactory, Source parentSource, ArcRecordBase record) throws EOFException, IOException, JHOVE2Exception { Payload payload; PayloadWithHeaderAbstract payloadHeaderWrapped; HttpHeader httpHeader; InputStream payload_stream; ArcRecordData recordData; String contentType; FormatIdentification formatId; contentType = record.header.contentTypeStr; /* * ARC Record Source. */ Source recordSrc = new ArcRecordSource(); recordSrc.setSourceAccessor(sourceFactory.createSourceAccessor(recordSrc)); recordSrc.setDeleteTempFileOnClose(jhove2.getInvocation().getDeleteTempFilesOnClose()); recordSrc = parentSource.addChildSource(recordSrc); ++recordNumber; /* * Version. (The first ARC record should be a (filedesc) version block record) */ if (recordNumber == 1) { if (record.recordType == ArcRecordBase.RT_VERSION_BLOCK) { if (record.versionHeader != null && record.versionHeader.bValidVersionFormat) { arcFileVersion = record.versionHeader.versionStr; } } } if (record.recordType == ArcRecordBase.RT_ARC_RECORD) { ++arcRecordNumber; } Integer count = blockDescVersions.get(record.header.recordFieldVersion); if (count == null) { count = 0; } ++count; blockDescVersions.put(record.header.recordFieldVersion, count); /* * Prepare payload. */ payload = record.getPayload(); httpHeader = null; payload_stream = null; if (payload != null) { payloadHeaderWrapped = payload.getPayloadHeaderWrapped(); if (payloadHeaderWrapped instanceof HttpHeader) { httpHeader = (HttpHeader)payloadHeaderWrapped; } if (httpHeader == null) { payload_stream = payload.getInputStream(); } else { contentType = httpHeader.getProtocolContentType(); payload_stream = httpHeader.getPayloadInputStream(); } } /* * Decide on Jhove2 format from contentType information. */ if (contentType != null) { int idx = contentType.indexOf(';'); if (idx >= 0) { contentType = contentType.substring(0, idx); } } formatId = null; if (contentType != null) { formatId = jhove2Ids.get(contentType.toLowerCase()); } /* * Characterize payload. */ if (recurse && payload_stream != null && !record.hasPseudoEmptyPayload()) { characterizePayload(jhove2, sourceFactory, recordSrc, payload_stream, formatId); } if (payload_stream != null) { payload_stream.close(); } /* * Close record to finish validation. */ if (payload != null) { payload.close(); } record.close(); /* * Properties. */ recordData = new ArcRecordData(record); recordSrc.addExtraProperties(recordData.getArcRecordBaseProperties()); switch (record.recordType) { case ArcRecordBase.RT_VERSION_BLOCK: recordSrc.addExtraProperties(recordData.getArcVersionBlockProperties()); break; case ArcRecordBase.RT_ARC_RECORD: recordSrc.addExtraProperties(recordData.getArcRecordProperties()); break; } // Update protocol statistics. if (recordData.protocol != null) { updateProtocols(recordData); } /* * Report errors. */ reportValidationErrors(recordSrc, record.diagnostics, jhove2); recordSrc.close(); } protected void updateProtocols(ArcRecordData recordData) { int number = 1; if (protocols.containsKey(recordData.protocol)) { number += protocols.get(recordData.protocol); } protocols.put(recordData.protocol, number); } /** * Process a ARC record payload, recursively if configured to do so. * @param jhove2 the JHove2 characterization context * @param sourceFactory JHove2 source factory * @param recordSrc ARC record source unit * @param payload_stream payload inputstream * @param formatId JHove2 format identification based on contentType * @throws EOFException if EOF occurs prematurely * @throws IOException if an IO error occurs while processing * @throws JHOVE2Exception if a serious problem needs to be reported */ protected void characterizePayload(JHOVE2 jhove2, SourceFactory sourceFactory, Source recordSrc, InputStream payload_stream, FormatIdentification formatId) throws EOFException, IOException, JHOVE2Exception { // Not all properties are ready yet, they are added as extras. String name = null; Source payloadSrc = sourceFactory.getSource(jhove2, payload_stream, name, null); if (payloadSrc != null) { payloadSrc.setDeleteTempFileOnClose(jhove2.getInvocation().getDeleteTempFilesOnClose()); payloadSrc = recordSrc.addChildSource(payloadSrc); // Add presumptive format based on content-type. if(formatId != null){ payloadSrc = payloadSrc.addPresumptiveFormat(formatId); } /* Make sure to close the Input after * characterization is completed. */ Input src_input = payloadSrc.getInput(jhove2); try { payloadSrc = jhove2.characterize(payloadSrc, src_input); } finally { if (src_input != null) { src_input.close(); } payloadSrc.close(); } } } /** * Reports validation errors/warnings on <code>Source</code>, if any. * @param src ARC source unit * @param diagnostics diagnostics object with possible errors/warnings. * @param jhove2 the JHove2 characterization context. * @throws IOException if an IO error occurs while processing * @throws JHOVE2Exception if a serious problem needs to be reported */ private void reportValidationErrors(Source src, Diagnostics<Diagnosis> diagnostics, JHOVE2 jhove2) throws JHOVE2Exception, IOException { if (diagnostics.hasErrors()) { // Report errors on source object. for (Diagnosis d : diagnostics.getErrors()) { src.addMessage(newValidityError(jhove2, Message.Severity.ERROR, d.type.toString().toLowerCase(), d.getMessageArgs())); //updateMap(e.error.toString() + '-' + e.field, this.errors); } } if (diagnostics.hasWarnings()) { // Report warnings on source object. for (Diagnosis d : diagnostics.getWarnings()) { src.addMessage(newValidityError(jhove2, Message.Severity.WARNING, d.type.toString().toLowerCase(), d.getMessageArgs())); } } } /** * Instantiates a new localized message. * @param jhove2 the JHove2 characterization context. * @param severity message severity * @param id the configuration property relative name. * @param params the values to add in the message * @return the new localized message * @throws JHOVE2Exception if a serious problem needs to be reported */ private Message newValidityError(JHOVE2 jhove2, Severity severity, String id, Object[] messageArgs) throws JHOVE2Exception { return new Message(severity, Message.Context.OBJECT, this.getClass().getName() + '.' + id, messageArgs, jhove2.getConfigInfo()); } //------------------------------------------------------------------------ // Validator interface support //------------------------------------------------------------------------ /** * Validates the ARC file, which in this case amounts to returning the * result since validation has already been done. * @param jhove2 the JHove2 characterization context. * @param source ARC file source unit. * @param input ARC file source input. */ @Override public Validity validate(JHOVE2 jhove2, Source source, Input input) throws JHOVE2Exception { return this.isValid(); } /** * Gets the validation coverage. * @return {@link Coverage.Selective selective}, always. */ @Override public Coverage getCoverage() { return COVERAGE; } /** * Gets ARC file validation status. * @return the {@link Validity validity status}. */ @Override public Validity isValid() { return this.isValid; } //------------------------------------------------------------------------ // Reportable properties //------------------------------------------------------------------------ /** * arcFile getter. * @return the arcFile */ @ReportableProperty(order=1, value="ARC file name") public String getArcFileName() { return arcFileName; } /** * arcFileSize getter. * @return the arcFileSize */ @ReportableProperty(order=2, value="ARC file size, in bytes") public long getArcFileSize() { return arcFileSize; } /** * Returns ARC file last modified date. * @return ARC file last modified date */ @ReportableProperty(order=3, value="ARC file last modified date") public Date getLastModified() { return arcFileLastModified; } /** * Returns ARC record number. * @return the number of arc record */ @ReportableProperty(order=4, value="The number of ARC records") public int getArcRecordNumber() { return arcRecordNumber; } /** * arcReaderConsumedBytes getter. * @return the arcReaderConsumedBytes */ @ReportableProperty(order=5, value="ARC reader consumed bytes, in bytes") public long getArcReaderConsumedBytes() { return arcReaderConsumedBytes; } /** * Returns the file version if it is the same for all records. * @return the file version */ @ReportableProperty(order=6, value="File version") public String getFileVersion() { return arcFileVersion; } /** * Returns the block description version if it is the same for all records. * @return the block description version */ @ReportableProperty(order=7, value="Block description version") public Integer getArcBlockDescVersion() { return arcBlockDescVersion; } /** * protocols getter. * @return the protocols */ @ReportableProperty(order=8, value="URL record protocols") public Map<String, Integer> getProtocols() { return protocols; } /** * errors getter. * @return the errors */ /* @ReportableProperty(order=9, value="The number of errors by error type") public Map<String,AtomicInteger> getErrors() { return errors; } */ //------------------------------------------------------------------------ // Specific implementation //------------------------------------------------------------------------ /** * Sets whether to recursively characterize ARC record objects. * @param recurse whether to recursively characterize ARC record objects */ public void setRecurse(boolean recurse) { this.recurse = recurse; } /** * Enable or disable block digest computation. * @param bComputeBlockDigest block digest computation toggle */ public void setComputeBlockDigest(boolean bComputeBlockDigest) { this.bComputeBlockDigest = bComputeBlockDigest; } /** * Set the block digest algorithm to be used in case no digest is present * in the WARC header. * @param blockDigestAlgorithm block digest algorithm */ public void setBlockDigestAlgorithm(String blockDigestAlgorithm) { this.blockDigestAlgorithm = blockDigestAlgorithm; } /** * Set the block digest encoding scheme to be used in case no digest * is present in the WARC header. * @param blockDigestEncoding block digest encoding scheme */ public void setBlockDigestEncoding(String blockDigestEncoding) { this.blockDigestEncoding = blockDigestEncoding; } /** * Enable or disable payload digest computation. * @param bComputePayloadDigest payload digest computation toggle */ public void setComputePayloadDigest(boolean bComputePayloadDigest) { this.bComputePayloadDigest = bComputePayloadDigest; } /** * Set the payload digest algorithm to be used in case no digest is present * in the WARC header. * @param payloadDigestAlgorithm payload digest algorithm */ public void setPayloadDigestAlgorithm(String payloadDigestAlgorithm) { this.payloadDigestAlgorithm = payloadDigestAlgorithm; } /** * Set the payload digest encoding scheme to be used in case no digest * is present in the WARC header. * @param payloadDigestEncoding payload digest encoding scheme */ public void setPayloadDigestEncoding(String payloadDigestEncoding) { this.payloadDigestEncoding = payloadDigestEncoding; } /** * Enable or disable strict Target URI validation. * @param bStrictUriValidation enable strict target URI validation switch */ public void setStrictUriValidation(boolean bStrictUriValidation) { this.bStrictUriValidation = bStrictUriValidation; } }