/**
* JHOVE2 - Next-generation architecture for format-aware characterization
*
* Copyright (c) 2009 by The Regents of the University of California,
* Ithaka Harbors, Inc., and The Board of Trustees of the Leland Stanford
* Junior University.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* o Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* o Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* o Neither the name of the University of California/California Digital
* Library, Ithaka Harbors/Portico, or Stanford University, nor the names of
* its contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.jhove2.module.format.zip;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.Date;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import org.jhove2.annotation.ReportableProperty;
import org.jhove2.core.Digest;
import org.jhove2.core.JHOVE2;
import org.jhove2.core.JHOVE2Exception;
import org.jhove2.core.format.Format;
import org.jhove2.core.io.Input;
import org.jhove2.core.source.DirectorySource;
import org.jhove2.core.source.Source;
import org.jhove2.core.source.SourceFactory;
import org.jhove2.module.digest.AbstractArrayDigester;
import org.jhove2.module.digest.CRC32Digester;
import org.jhove2.module.format.BaseFormatModule;
import org.jhove2.module.format.Validator;
import org.jhove2.persist.FormatModuleAccessor;
import com.sleepycat.persist.model.Persistent;
/**
* JHOVE2 Zip module.
*
* @author mstrong, slabrams
*/
@Persistent
public class ZipModule
extends BaseFormatModule
implements Validator
{
/** Zip module version identifier. */
public static final String VERSION = "2.0.0";
/** Zip module release date. */
public static final String RELEASE = "2010-09-10";
/** Zip module rights statement. */
public static final String RIGHTS =
"Copyright 2010 by The Regents of the University of California, " +
"Ithaka Harbors, Inc., and The Board of Trustees of the Leland " +
"Stanford Junior University. " +
"Available under the terms of the BSD license.";
/** Module validation coverage. */
public static final Coverage COVERAGE = Coverage.Selective;
/** Archive extra data record signature. */
public static final int ARCHIVE_EXTRA_DATA_RECORD_SIGNATURE = 0x08064b50;
/** Central directory structure signature. */
public static final int CENTRAL_DIRECTORY_STRUCTURE_SIGNATURE = 0x02014b50;
/** Digital signature header signature. */
public static final int DIGITAL_SIGNATURE_HEADER_SIGNATURE = 0x05054b50;
/** End of central directory signature. */
public static final int END_OF_CENTRAL_DIRECTORY_SIGNATURE = 0x06054b50;
/** Local file header signature. */
public static final int LOCAL_FILE_HEADER_SIGNATURE = 0x04034b50;
/** Zip64 end of central directory signature. */
public static final int ZIP64_END_OF_CENTRAL_DIRECTORY_SIGNATURE = 0x06064b50;
/** Zip64 end of central directory locator signature. */
public static final int ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIGNATURE = 0x07064b50;
/** Validation status. */
protected Validity isValid;
/**
* Instantiate a new <code>ZipModule</code>.
*
* @param format
* Zip format
* @param formatModuleAccessor
* FormatModuleAccessor to manage access to Format Profiles
*/
public ZipModule(Format format,
FormatModuleAccessor formatModuleAccessor) {
super(VERSION, RELEASE, RIGHTS, format, formatModuleAccessor);
this.isValid = Validity.Undetermined;
}
public ZipModule(){
this(null,null);
}
/**
* Parse a Zip source unit.
*
* @param source
* Zip source unit
* @input param
* Zip source input
* @return 0
* @throws EOFException
* If End-of-File is reached reading the source unit
* @throws IOException
* If an I/O exception is raised reading the source unit
* @throws JHOVE2Exception
* @see org.jhove2.module.format.FormatModule#parse(org.jhove2.core.JHOVE2,
* org.jhove2.core.source.Source, org.jhove2.core.io.Input)
*/
@Override
public long parse(JHOVE2 jhove2, Source source, Input input)
throws EOFException, IOException, JHOVE2Exception
{
long consumed = 0L;
/* this.isValid = Validity.True; */
input.setByteOrder(ByteOrder.LITTLE_ENDIAN);
/* Use the native Java Zip classes to retrieve the (possibly)
* compressed entries as individual source units.
*/
File file = source.getFile();
ZipFile zip = new ZipFile(file, ZipFile.OPEN_READ);
if (zip != null) {
source.setIsAggregate(true);
try {
/* Zip entries (files and directories) are not necessarily in
* hierarchical order. Also, directories may be implicit, that
* is, referred to in the pathnames of files or directories but
* not explicitly present in the form of a directory entry.
*
* Since all files and directories need to be associated with
* the correct parent directory in order for aggregate
* characterization to work properly, we there are three stages
* of processing:
*
* (1) Identify all explicit directory entries, creating
* Directory sources and putting them into a map keyed to
* the directory pathname.
*
* (2) Identify all implicit directories (by extracting
* directories from pathnames and checking to see if they
* are not already in the map), creating Directory sources
* and putting them into the map. Also characterize any
* top-level file entries (children of the Zip file) that
* are found.
*
* (3) Directly characterize all top-level directories, that
* is, those whose parent is the Zip file. This will
* implicitly characterize all child files and directories.
*/
Map<String, Source> map = new TreeMap<String, Source>();
Enumeration<? extends ZipEntry> en = zip.entries();
SourceFactory factory = jhove2.getSourceFactory();
if (factory == null){
throw new JHOVE2Exception("JHOVE2 SourceFactory is null");
}
/* (1) Identify all directories that are explicit entries. */
while (en.hasMoreElements()) {
ZipEntry entry = en.nextElement();
if (entry.isDirectory()) {
String name = entry.getName();
/* Delete trailing slash from path name, if necessary. Although this
* always should be a forward slash (/), in practice a backward slash
* \) may be found.
*/
int in = name.lastIndexOf('/');
if (in < 0) {
in = name.lastIndexOf('\\');
}
if (in == name.length() - 1) {
name = name.substring(0, in);
}
// Source src =
// factory.getSource(jhove2, zip, entry);
Source src =
factory.getDirectorySource(jhove2, name, false);
if (src != null) {
/* Get the entry-specific properties. */
long crc = entry.getCrc();
Digest crc32 = new Digest(AbstractArrayDigester.toHexString(crc),
CRC32Digester.ALGORITHM);
ZipEntryProperties properties =
new ZipEntryProperties(name, entry.getCompressedSize(), crc32,
entry.getComment(),
new Date(entry.getTime()));
src = src.addExtraProperties(properties);
String key = entry.getName();
/* Remove trailing slash. Although this always
* should be a forward slash (/), in practice a
* backward slash (\) may be found. */
int len = key.length() - 1;
char ch = key.charAt(len);
if (ch == '/') {
key = key.substring(0, len);
}
else if (ch == '\\') {
key = key.substring(0, len);
}
map.put(key, src);
}
}
}
/* (2) Characterize each file entry and associate it with its
* parent source unit. Directory entries are not characterized
* now, since all of their child files may not yet be
* associated with them.
*/
/* Identify all directories that are not implicit entries but
* are implied by file and directory pathnames. Also, create
* File source units for file entries, and if they are top-
* level entries, that is, child of the Zip file, characterize
* them. Lower level file entries (and directories) will be
* characterized later on as part of the recursive processing
* of top-level directories.
*/
en = zip.entries();
while (en.hasMoreElements()) {
ZipEntry entry = en.nextElement();
String name = entry.getName();
if (entry.isDirectory()) {
/* Remove trailing slash. Although this always should
* be a forward slash (/), in practice a backward
* slash (\) may be found. */
int len = name.length() - 1;
char ch = name.charAt(len);
if (ch == '/') {
name = name.substring(0, len);
}
else if (ch == '\\') {
name = name.substring(0, len);
}
}
/* Check to make sure all directories implied in the
* pathname are also in the map.
*/
checkForImpliedDirectories(jhove2, name, map, source,
factory);
if (entry.isDirectory()) {
Source src = map.get(name);
/* Retrieve directory parent from the map. Although
* the path separator always should be a forward slash
* (/), in practice a backward slash (\) may be found. */
int in = name.lastIndexOf('/');
if (in < 0) {
in = name.lastIndexOf('\\');
}
if (in > -1) {
/* Directory is a child of a Directory retrievable
* from the map.
*/
String key = name.substring(0, in);
Source parent = map.get(key);
src = parent.addChildSource(src);
}
else {
/* Directory is a child of the Zip file. */
src = source.addChildSource(src);
}
}
else { /* Entry is a file. */
// Source src =
// factory.getSource(jhove2, zip, entry);
/* Recover the filename from the pathname. Although the path
* separator always should be a forward slash (/), in practice a
* backward slash (\) may be found.
*/
int in = name.lastIndexOf('/');
if (in < 0) {
in = name.lastIndexOf('\\');
}
if (in > -1) {
name = name.substring(in+1);
}
/* Create a temporary Java {@link java.io.File} to represent the
* file entry.
*/
InputStream stream = zip.getInputStream(entry);
/* Get the entry-specific properties. */
long crc = entry.getCrc();
Digest crc32 = new Digest(AbstractArrayDigester.toHexString(crc),
CRC32Digester.ALGORITHM);
ZipEntryProperties properties =
new ZipEntryProperties(name, entry.getCompressedSize(), crc32,
entry.getComment(),
new Date(entry.getTime()));
Source src =
factory.getSource(jhove2, stream, name, properties);
if (src != null) {
/* Check if the file pathname includes a directory
* component. Although the path separator always
* should be a forward slash (/), in practice a
* backward slash (\) may be found.
*/
name = entry.getName();
in = name.lastIndexOf('/');
if (in < 0) {
in = name.lastIndexOf('\\');
}
if (in > -1) {
/* File is a child of a Directory retrievable
* from the map.
*/
String key = name.substring(0, in);
Source parent = map.get(key);
src = parent.addChildSource(src);
}
else {
/* File is a child of the Zip file and can be
* characterized now. All other files will be
* characterized later as part of the recursive
* characterization of top-level directories.
*/
src = source.addChildSource(src);
/* Make sure to close the Input after
* characterization is completed.
*/
Input inpt = src.getInput(jhove2);
try {
src = jhove2.characterize(src, inpt);
}
finally {
if (inpt != null) {
inpt.close();
}
}
}
}
}
}
/* (3) Characterize all top-level directories, implicitly
* characterizing all lower-level files and directories.
*/
List<Source> list = source.getChildSources();
Iterator<Source> iter = list.iterator();
while (iter.hasNext()) {
Source src = iter.next();
if (src instanceof DirectorySource) {
/* Make sure to close the Input after characterization
* is completed.
*/
Input inpt = src.getInput(jhove2);
try {
src = jhove2.characterize(src, inpt);
}
finally {
if (inpt != null) {
inpt.close();
}
}
}
}
}
finally {
zip.close();
}
}
return consumed;
}
/** Validate the Zip file.
* @param jhove2 JHOVE2 framework object
* @param source Zip file source unit
* @param input Zip file source input
* @see org.jhove2.module.format.Validator#validate(org.jhove2.core.JHOVE2, org.jhove2.core.source.Source, org.jhove2.core.io.Input)
*/
@Override
public Validity validate(JHOVE2 jhove2, Source source, Input input)
throws JHOVE2Exception
{
return this.isValid();
}
/** Get validation coverage.
* @return Validation coverage
* @see org.jhove2.module.format.Validator#getCoverage()
*/
@Override
public Coverage getCoverage() {
return COVERAGE;
}
// /** Get Zip file entries.
// * @return Zip file entries
// */
// @ReportableProperty(order=1, value="Zip file entries")
// public List<ZipFileEntry> getZipFileEntries() {
// return this.entries;
// }
/** Get validity.
* @return Validity
* @see org.jhove2.module.format.Validator#isValid()
*/
@Override
public Validity isValid()
{
return this.isValid;
}
/** Check for directories implied by the pathnames for file and directory
* entries.
* @param jhove2 JHOVE2 framework object
* @param name File or directory entry pathname
* @param map Map of directories
* @param source Zip file source unit
* @param factory Source factory
* @throws JHOVE2Exception
* @throws IOException
*
*/
protected void checkForImpliedDirectories(JHOVE2 jhove2, String name,
Map<String, Source> map,
Source source,
SourceFactory factory)
throws IOException, JHOVE2Exception
{
Source parent = source;
int n = 0;
boolean again = true;
/* Check each directory in the path. If it is not already in the map,
* create a new Directory source, add it to the map, and add it as a
* child of the appropriate directory or the Zip file itself.
*/
while (again) {
/* Although the path separator always should be a forward slash
* (/), in practice a backward slash (\) may be found.
*/
int in = name.indexOf('/', n);
if (in < 0) {
in = name.indexOf('\\', n);
}
if (in > 0) {
String key = name.substring(0, in);
/* If the directory is not in the map, add it. */
Source src = map.get(key);
if (src == null) {
src = factory.getDirectorySource(jhove2, key, false);
src = parent.addChildSource(src);
map.put(key, src);
parent = src;
}
n = in + 1;
}
else {
again = false;
}
}
}
}