/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package com.mucommander.commons.file.archive.zip.provider; import com.mucommander.commons.file.AbstractFile; import com.mucommander.commons.file.UnsupportedFileOperationException; import com.mucommander.commons.io.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; import java.util.Hashtable; import java.util.Iterator; import java.util.Vector; import java.util.zip.Deflater; import java.util.zip.Inflater; import java.util.zip.InflaterInputStream; import java.util.zip.ZipException; /** * This class is a replacement for <code>java.util.ZipFile</code> with some extra functionalities: * <ul> * <li>Ability to add or remove entries 'on-the-fly', i.e. without rewriting the whole archive. * <li>Advanced encoding support for filenames and comments. UTF-8 is used for parsing entries that explicitely declare * using UTF-8 (as per Zip specs). For entries that do not use UTF-8, the encoding is auto-detected (best effort). * Alternatively, the encoding used for parsing entries can be specified if it is known in advance. For new entries * added with {@link #addEntry(ZipEntry)}, UTF-8 is always used and declared as such in the Zip headers. * <li>Loads the internal/external file attributes and extra fields instead of ignoring them * </ul> * * <p>This class doesn't extend <code>java.util.zip.ZipFile</code> as it would have to reimplement all methods anyway. * Like <code>java.util.ZipFile</code>, it supports compressed (DEFLATED) and uncompressed (STORED) entries.</p> * * <p>Random read access is required to instantiate a <code>ZipFile</code> and retrieve its entries. Furthermore, random * write access is required for methods that modify the Zip file.</p> * * <p>The method signatures mimic the ones of <code>java.util.zip.ZipFile</code> with a few exceptions: * <ul> * <li>There is no <code>getName</code> method.</li> * <li>There is no <code>close</code> method: underlying input and output streams are opened and closed automatically * as they are needed.</li> * <li><code>entries</code> has been renamed to {@link #getEntries()} and returns an <code>Iterator</code> instead of * an <code>Enumeration</code>.</li> * <li><code>size</code> has been renamed to {@link #getNbEntries()}.</li> * </ul> * </p> * * <p>--------------------------------------------------------------------------------------------------------------<br> * <br> * This class is based off the <code>org.apache.tools.zip</code> package of the <i>Apache Ant</i> project. The Ant * code has been modified under the terms of the Apache License which you can find in the bundled muCommander license * file. It was forked at version 1.7.0 of Ant.</p> * * @author Apache Ant, Maxence Bernard */ public class ZipFile implements ZipConstants { private static final Logger LOGGER = LoggerFactory.getLogger(ZipFile.class); /** The underlying archive file */ private AbstractFile file; /** The currently opened RandomAccessInputStream to the zip file (may be null) */ private RandomAccessInputStream rais; /** The currently opened RandomAccessInputStream to the zip file (may be null) */ private RandomAccessOutputStream raos; /** Contains ZipEntry instances corresponding to the archive's entries, in the order they were found in the archive. */ private Vector<ZipEntry> entries = new Vector<ZipEntry>(); /** Maps entry paths to corresponding ZipEntry instances */ private Hashtable<String, ZipEntry> nameMap = new Hashtable<String, ZipEntry>(); /** Global zip file comment */ private String comment; /** * The default encoding to use for parsing filenames and comments. This value is only used for Zip entries that do * not have the UTF-8 flag set. If not specified (null), then automatic encoding detection is used (default). */ private String defaultEncoding = null; /** Holds byte buffer instance used to convert short and longs, avoids creating lots of small arrays */ private ZipBuffer zipBuffer = new ZipBuffer(); /** * Opens the given Zip file and parses information about the entries it contains. * * <p>The given {@link AbstractFile} must have random read access. If not, an <code>IOException</code> will be * thrown.</p> * * @param f the archive file * @throws IOException if an error occurred while reading the Zip file. * @throws ZipException if this file is not a valid Zip file * @throws UnsupportedFileOperationException if a required operation is not supported by the underlying filesystem. */ public ZipFile(AbstractFile f) throws IOException, ZipException, UnsupportedFileOperationException { this.file = f; try { openRead(); parseCentralDirectory(); } finally { closeRead(); } } /** * Opens the zip file for random read access. * * @throws IOException if an error occured while opening the zip file for random read access. * @throws UnsupportedFileOperationException if a required operation is not supported by the underlying filesystem. */ private void openRead() throws IOException, UnsupportedFileOperationException { if(rais!=null) { LOGGER.info("Warning: an existing RandomAccessInputStream was found, closing it now"); rais.close(); } rais = file.getRandomAccessInputStream(); } /** * Closes the current RandomAccessInputStream to the zip file. * * @throws IOException if an error occurred */ private void closeRead() throws IOException { if(rais!=null) { try { rais.close(); } finally { rais = null; } } } /** * Opens the zip file for random write access. * * @throws IOException if an error occured while opening the zip file for random read access. * @throws UnsupportedFileOperationException if a required operation is not supported by the underlying filesystem. */ private void openWrite() throws IOException, UnsupportedFileOperationException { if(raos!=null) { LOGGER.info("Warning: an existing RandomAccessOutputStream was found, closing it now"); raos.close(); } // Create a buffered output stream to improve write performance, as headers are written by small chunks raos = new BufferedRandomOutputStream(file.getRandomAccessOutputStream(), WRITE_BUFFER_SIZE); } /** * Closes the current RandomAccessOutputStream to the zip file. * * @throws IOException if an error occurred */ private void closeWrite() throws IOException { if(raos!=null) { try { raos.close(); } finally { raos = null; } } } /** * Returns the default encoding to use for parsing filenames and comments. This value is not used for Zip entries * that explicitely declare using UTF-8 (in the general purpose bit flag). * * <p>By default, this method returns <code>null</code> to indicate that automatic encoding detection is used. * Although it is not 100% accurate, encoding detection is the preferred approach, unless the encoding is known * in advance which is rather uncommon.</p> * * <p>Note that this value only affects entries <i>parsing</i>. Written entries are systematically encoded in * <code>UTF-8</code> and declared as such in the general purpose bit flag so that proper zip unpackers know what * encoding to expect.</p> * * @return the default encoding to use for parsing filenames and comments */ public String getDefaultEncoding() { return defaultEncoding; } /** * Sets the default encoding to use for parsing filenames and comments. This value is not used for Zip entries * that explicitely declare using UTF-8 (in the general purpose bit flag). * * <p>By default, the encoding is <code>null</code> to indicate that automatic encoding detection is used. * Although it is not 100% accurate, encoding detection is the preferred approach, unless the encoding is known * in advance which is rather uncommon.</p> * * <p>Note that this value only affects entries <i>parsing</i>. Written entries are systematically encoded in * <code>UTF-8</code> and declared as such in the general purpose bit flag so that proper zip unpackers know what * encoding to expect.</p> * * @param defaultEncoding the default encoding to use for parsing filenames and comments */ public void setDefaultEncoding(String defaultEncoding) { this.defaultEncoding = defaultEncoding; } /** * Returns all entries as an <code>Iterator</code> of {@link ZipEntry} instances. * * @return Returns all entries as an <code>Iterator</code> of ZipEntry instances. */ public Iterator<ZipEntry> getEntries() { return entries.iterator(); } /** * Returns the number of entries contained by this Zip file. * * @return the number of entries contained by this Zip file */ public int getNbEntries() { return entries.size(); } /** * Returns a named entry or <code>null</code> if no entry by that name exists. * * @param name name of the entry. * @return the ZipEntry corresponding to the given name or <code>null</code> if not present. */ public ZipEntry getEntry(String name) { return nameMap.get(name); } /** * Returns an InputStream for reading the contents of the given entry. * * @param ze the entry to get the stream for. * @return a stream to read the entry from. * @throws IOException if unable to create an input stream from the zipentry * @throws ZipException if the zipentry has an unsupported compression method * @throws UnsupportedFileOperationException if a required operation is not supported by the underlying filesystem. */ public InputStream getInputStream(ZipEntry ze) throws IOException, ZipException, UnsupportedFileOperationException { ZipEntryInfo entryInfo = ze.getEntryInfo(); if (entryInfo == null) throw new ZipException("Unknown entry: "+ze.getName()); openRead(); RandomAccessInputStream entryIn = this.rais; // If data offset is -1 (not calculated yet), calculate it now if (entryInfo.dataOffset == -1) calculateDataOffset(entryInfo); this.rais = null; long start = entryInfo.dataOffset; BoundedInputStream bis = new BoundedInputStream(entryIn, start, ze.getCompressedSize()); switch (ze.getMethod()) { case ZipConstants.STORED: return bis; case ZipConstants.DEFLATED: bis.addDummy(); return new InflaterInputStream(bis, new Inflater(true)); default: throw new ZipException("Found unsupported compression method " + ze.getMethod()); } } /** * Deletes the given entry from this zip file. For performance reasons, this method removes the central file * header and zero out the local file header and data so that the entry cannot be retrieved, but it does * not reclaim the freed space and produces fragmentation. In other words, the resulting zip file will not be * smaller after the entry has been deleted and will contain an area of unused space. The {@link #defragment} method * can be called to reclaim the free space. * * <p>There is one case where this method reclaims the free space: when the specified entry is the last one in the * zip file. In this case, the resulting zip file will be smaller.</p> * * <p>Note that 'fragmented' zip files are perfectly valid zip files, any zip parser should be able to cope with * such files.<p> * * <p>The underlying {@link AbstractFile} must have random write access. If not, an <code>IOException</code> will be * thrown.</p> * * @param ze the ZipEntry to delete * @throws IOException if an I/O error occurred * @throws ZipException if the specified ZipEntry cannot be found in this zip file * @throws UnsupportedFileOperationException if a required operation is not supported by the underlying filesystem. */ public void deleteEntry(ZipEntry ze) throws IOException, ZipException, UnsupportedFileOperationException { openRead(); openWrite(); try { ZipEntryInfo entryInfo = ze.getEntryInfo(); if (entryInfo == null) { // Fail silently if the entry is a directory as specific directory entries do not always exist // in zip files. if(ze.isDirectory()) return; throw new ZipException("Unknown entry: "+ze.getName()); } // Strip out central file header of deleted entry int entryIndex = entries.indexOf(ze); int nbEntries = entries.size(); long cdStartOffset; long cdEndOffset; if(nbEntries==1) { // Special case if the deleted entry is the only one, the zip file will become empty. // Note: empty zip files must have the central directory start at offset 0. cdStartOffset = 0; cdEndOffset = 0; raos.seek(0); } else { cdStartOffset = entries.elementAt(0).getEntryInfo().centralHeaderOffset; long shift; if(entryIndex==nbEntries-1) { // Lucky case! If the entry to delete is the last one, we can easily/quickly reclaim the space used // by this entry by moving the central directory (minus the last header corresponding to the deleted // entry) to where the entry's local header started. // The entry before the deleted one, will become the last one ZipEntryInfo lastEntryInfo = entries.elementAt(nbEntries-2).getEntryInfo(); // Destination offset long newCdStartOffset = entryInfo.headerOffset; cdEndOffset = lastEntryInfo.centralHeaderOffset + lastEntryInfo.centralHeaderLen; long cdLength = cdEndOffset-cdStartOffset; // Copy the central directory StreamUtils.copyChunk(rais, raos, cdStartOffset, newCdStartOffset, cdLength); // Update central directory header offsets shift = cdStartOffset-newCdStartOffset; for(int i=0; i<nbEntries-1; i++) entries.elementAt(i).getEntryInfo().centralHeaderOffset -= shift; cdStartOffset = newCdStartOffset; cdEndOffset = newCdStartOffset + cdLength; } else { // Most frequent case: the entry to delete is neither the only one nor the last one. // In this case, we don't reclaim the space (would be too slow) but simply zero out // the local file header and data (so that the data entry can't be retrieved) // If data offset is -1 (not calculated yet), calculate it now if (entryInfo.dataOffset == -1) calculateDataOffset(entryInfo); // Zero out all bytes of the local file header+data for the deleted entry // Note: the data descriptor (if any) is not erased, this would require some extra check and it is // not really necessary, as the information it contains is not sensitive raos.seek(entryInfo.headerOffset); StreamUtils.fillWithConstant(raos, (byte)0, entryInfo.dataOffset-entryInfo.headerOffset + ze.getCompressedSize(), WRITE_BUFFER_SIZE); // Update the central directory : // - do not touch the file headers that are located before the deleted entry // - move the file headers that are located after the deleted entry to where the deleted entry's // header was (this will remove the deleted entry's file header) ZipEntryInfo lastEntryInfo = entries.elementAt(nbEntries-1).getEntryInfo(); long startOffset = entries.elementAt(entryIndex+1).getEntryInfo().centralHeaderOffset; cdEndOffset = lastEntryInfo.centralHeaderOffset + lastEntryInfo.centralHeaderLen; StreamUtils.copyChunk(rais, raos, startOffset, entryInfo.centralHeaderOffset, cdEndOffset-startOffset); // Update central directory header offsets for files located after the deleted entry as their // offset has changed shift = entryInfo.centralHeaderLen; for(int i=entryIndex+1; i<nbEntries; i++) entries.elementAt(i).getEntryInfo().centralHeaderOffset -= shift; cdEndOffset -= shift; } } // Write the central directory end section ZipOutputStream.writeCentralDirectoryEnd(raos, nbEntries-1, cdEndOffset - cdStartOffset, cdStartOffset, comment, UTF_8, zipBuffer); // Truncate the zip file to reclaim the trailing unused space raos.setLength(raos.getOffset()); // All good, remove the deleted entry from the lists entries.removeElementAt(entryIndex); nameMap.remove(ze.getName()); } finally { try { closeRead(); } catch(IOException e) {} try { closeWrite(); } catch(IOException e) {} } } /** * Appends the given entry to the end of this zip file and returns an <code>OutputStream</code> that allows to write * the contents of the entry. The returned <code>OutputStream</code> must always be closed for the zip file to be * properly modified. Not doing will leave this zip file in a inconsistent, corrupted state. * * <p>The underlying {@link AbstractFile} must have random write access. If not, an <code>IOException</code> will be * thrown.</p> * * @param entry the entry to add to this zip file * @return an OutputStream to write the contents of the entry * @throws IOException if an I/O error occurred * @throws UnsupportedFileOperationException if a required operation is not supported by the underlying filesystem. * or is not implemented. */ public OutputStream addEntry(final ZipEntry entry) throws IOException, UnsupportedFileOperationException { try { // Open the zip file for random read and write access openRead(); openWrite(); // Write the new entry's local file header right before the central directory start positionAtCentralDirectory(); long centralDirectoryStart = rais.getOffset(); raos.seek(centralDirectoryStart); final ZipEntryInfo entryInfo = new ZipEntryInfo(); entryInfo.encoding = UTF_8; // Always use UTF-8 for new entries entryInfo.headerOffset = centralDirectoryStart; entryInfo.dataOffset = entryInfo.headerOffset + ZipOutputStream.writeLocalFileHeader(entry, raos, entryInfo.encoding, false, zipBuffer); // Add the new entry to the internal lists entry.setEntryInfo(entryInfo); entries.add(entry); nameMap.put(entry.getName(), entry); // Create the ZipEntryOutputStream to write the entry's contents // Use BufferPool to avoid excessive memory allocation and garbage collection. final byte[] deflaterBuf = BufferPool.getByteArray(DEFAULT_DEFLATER_BUFFER_SIZE); ZipEntryOutputStream zeos = new DeflatedOutputStream(raos, new Deflater(DEFAULT_DEFLATER_COMPRESSION, true), deflaterBuf) { // Post-data file info and central directory get written when the stream is closed @Override public void close() throws IOException { // Write data info in the local file header ZipOutputStream.finalizeEntryData(entry, this, raos, false, zipBuffer); // Write the central directory that was squashed by the new entry (at least partially) ZipEntry tempZe; ZipEntryInfo tempEntryInfo; int nbEntries = entries.size(); long cdLength = 0; // Length of central directory long cdOffset = raos.getOffset(); // Offset of central directory for(int i=0; i<nbEntries; i++) { tempZe = entries.elementAt(i); tempEntryInfo = tempZe.getEntryInfo(); // Update offset to central header tempEntryInfo.centralHeaderOffset = raos.getOffset(); cdLength += ZipOutputStream.writeCentralFileHeader( tempZe, raos, tempEntryInfo.encoding, // Preserve existing encoding so that LFH and CFH match tempEntryInfo.headerOffset, tempEntryInfo.hasDataDescriptor, zipBuffer); // Update length of central header tempEntryInfo.centralHeaderLen = raos.getOffset() - tempEntryInfo.centralHeaderOffset; } ZipOutputStream.writeCentralDirectoryEnd(raos, nbEntries, cdLength, cdOffset, comment, UTF_8, zipBuffer); // In some rare cases, the resulting zip file may be smaller. // Truncate the file to ensure that it ends at the central directory end position. raos.setLength(raos.getOffset()); // Release the buffer for reuse BufferPool.releaseByteArray(deflaterBuf); super.close(); closeWrite(); } }; // Directory entries cannot contain data, close the stream now and return null if(entry.isDirectory()) { zeos.close(); return null; } return zeos; } finally { closeRead(); // Note: RandomAccessOutputStream is closed by ZipEntryOutputStream#close() } } /** * Updates the date and permissions of the entry designated by the given ZipEntry object. The specified entry must * exist in this Zip file. * * <p>The underlying {@link AbstractFile} must have random write access. If not, an <code>IOException</code> will be * thrown.</p> * * @param entry the entry to update * @throws IOException if an I/O error occurred * @throws UnsupportedFileOperationException if a required operation is not supported by the underlying filesystem. */ public void updateEntry(ZipEntry entry) throws IOException, UnsupportedFileOperationException { try { // Open the zip file for write openWrite(); ZipEntryInfo entryInfo = entry.getEntryInfo(); /* Local file header */ // Update time and date raos.seek(entryInfo.headerOffset+10); raos.write(ZipLong.getBytes(entry.getDosTime(), zipBuffer.longBuffer)); // Note: external attributes are not present in the local file header /* Central file header */ // Update 'Version made by', platform might have changed if the Zip didn't contain Unix permissions raos.seek(entryInfo.centralHeaderOffset+4); ZipOutputStream.writeVersionMadeBy(entry, raos, zipBuffer); // Update time and date raos.seek(entryInfo.centralHeaderOffset+12); raos.write(ZipLong.getBytes(entry.getDosTime(), zipBuffer.longBuffer)); // Update 'external attributes' for permissions raos.seek(entryInfo.centralHeaderOffset+38); raos.write(ZipLong.getBytes(entry.getExternalAttributes(), zipBuffer.longBuffer)); } finally { closeWrite(); } } /** * Removes free space fragments from this zip file, thus reducing the size of the zip file. If this zip file does * not contain any free space fragments, the zip file is not modified. * * <p>Fragmentation occurs when deleting entries with {@link #deleteEntry(ZipEntry)}. When deleting several entries, * this method should be called once after all entries have deleted.</p> * * <p>The underlying {@link AbstractFile} must have random write access. If not, an <code>IOException</code> will be * thrown.</p> * * @throws IOException if an I/O error occurred * @throws UnsupportedFileOperationException if a required operation is not supported by the underlying filesystem. */ public void defragment() throws IOException, UnsupportedFileOperationException { int nbEntries = entries.size(); if(nbEntries==0) return; try { openRead(); openWrite(); ZipEntry currentEntry, previousEntry; ZipEntryInfo currentEntryInfo, previousEntryInfo; long shift = 0; // Special case for the first entry currentEntry = entries.elementAt(0); currentEntryInfo = currentEntry.getEntryInfo(); // If data offset is -1 (not calculated yet), calculate it now if (currentEntryInfo.dataOffset == -1) calculateDataOffset(currentEntryInfo); if(currentEntryInfo.headerOffset>0) { StreamUtils.copyChunk(rais, raos, currentEntryInfo.headerOffset, 0, (currentEntryInfo.dataOffset- currentEntryInfo.headerOffset)+currentEntry.getCompressedSize()); shift = currentEntryInfo.headerOffset; currentEntryInfo.headerOffset = 0; currentEntryInfo.dataOffset -= shift; } previousEntry = currentEntry; previousEntryInfo = currentEntryInfo; // Process all other entries for(int i=1; i<nbEntries; i++) { currentEntry = entries.elementAt(i); currentEntryInfo = currentEntry.getEntryInfo(); // If data offset is -1 (not calculated yet), calculate it now if (currentEntryInfo.dataOffset == -1) calculateDataOffset(currentEntryInfo); // Calculate the offset to the end of the previous entry based on its data offset and compressed size // and taking into account a potential data descriptor long previousCompressedSize = previousEntry.getCompressedSize(); long previousEntryEnd = previousEntryInfo.dataOffset+previousCompressedSize; if(previousEntryInfo.hasDataDescriptor) previousEntryEnd += 16; // Tests if there is some unused space between the 2 entries if(previousEntryEnd < currentEntryInfo.headerOffset) { StreamUtils.copyChunk(rais, raos, currentEntryInfo.headerOffset, previousEntryInfo.dataOffset+previousCompressedSize, (currentEntryInfo.dataOffset- currentEntryInfo.headerOffset)+currentEntry.getCompressedSize()); shift = currentEntryInfo.headerOffset - (previousEntryInfo.dataOffset+previousCompressedSize); currentEntryInfo.headerOffset -= shift; currentEntryInfo.dataOffset -= shift; } previousEntry = currentEntry; previousEntryInfo = currentEntryInfo; } // Rewrite central directory with updated offsets if(shift!=0) { long cdLength = 0; // Length of central directory long cdOffset = raos.getOffset(); // Offset of central directory ZipEntry ze; ZipEntryInfo entryInfo; for(int i=0; i<nbEntries; i++) { ze = entries.elementAt(i); entryInfo = ze.getEntryInfo(); // Update offset to central directory file header entryInfo.centralHeaderOffset = raos.getOffset(); // Preserve existing encoding when rewriting CFH so that it matches LFH cdLength += ZipOutputStream.writeCentralFileHeader(ze, raos, entryInfo.encoding, entryInfo.headerOffset, entryInfo.hasDataDescriptor, zipBuffer); // Update length of central directory file header entryInfo.centralHeaderLen = raos.getOffset() - entryInfo.centralHeaderOffset; } ZipOutputStream.writeCentralDirectoryEnd(raos, nbEntries, cdLength, cdOffset, comment, UTF_8, zipBuffer); // Truncate the zip file to reclaim the trailing unused space raos.setLength(raos.getOffset()); } } finally { try { closeRead(); } catch(IOException e) {} try { closeWrite(); } catch(IOException e) {} } } /** * Calulcates the data offset of the entry which starts at the given ZipEntryInfo.headerOffset and stores the result * in ZipEntryInfo.dataOffset. After calling this method, the RandomAccessInputStream will be positionned at the * beginning of the filename field. * * @param entryInfo the ZipEntryInfo object in which to store the data offset * @throws IOException if an unexpected I/O error occurred */ private void calculateDataOffset(ZipEntryInfo entryInfo) throws IOException { // Skip the following fields: // local file header signature 4 bytes // version needed to extract 2 bytes // general purpose bit flag 2 bytes // compression method 2 bytes // last mod file time 2 bytes // last mod file date 2 bytes // crc-32 4 bytes // compressed size 4 bytes // uncompressed size 4 bytes // Total nb of bytes to skip: 26 long dataOffset = entryInfo.headerOffset + 26; rais.seek(dataOffset); // Advance the offset of the filename field's length (plus the filename length field: 2 bytes) byte[] b = new byte[2]; rais.readFully(b); dataOffset += 2 + ZipShort.getValue(b); // Advance the offset of the extra field's length (plus the extra field length field: 2 bytes) rais.readFully(b); dataOffset += 2 + ZipShort.getValue(b); entryInfo.dataOffset = dataOffset; } /** Combined length of all constant-size fields of the Central File Header */ private static final int CFH_LEN = /* version made by */ 2 /* version needed to extract */ + 2 /* general purpose bit flag */ + 2 /* compression method */ + 2 /* last mod file time */ + 2 /* last mod file date */ + 2 /* crc-32 */ + 4 /* compressed size */ + 4 /* uncompressed size */ + 4 /* filename length */ + 2 /* extra field length */ + 2 /* file comment length */ + 2 /* disk number start */ + 2 /* internal file attributes */ + 2 /* external file attributes */ + 4 /* relative offset of local header */ + 4; /** * Reads the central directory of the given archive and populates * the internal tables with ZipEntry instances. * * <p>The ZipEntrys will know all data that can be obtained from * the central directory alone, but not the data that requires the * local file header or additional data to be read.</p> * * @throws IOException if an I/O error occurred * @throws ZipException if this file is not a valid Zip file */ private void parseCentralDirectory() throws IOException, ZipException { positionAtCentralDirectory(); byte[] cfh = new byte[CFH_LEN]; byte[] signatureBytes = new byte[4]; rais.readFully(signatureBytes); long sig = ZipLong.getValue(signatureBytes); final long cfhSig = ZipLong.getValue(CFH_SIG); boolean defaultEncodingSet = defaultEncoding!=null; ByteArrayOutputStream encodingAccumulator = defaultEncodingSet?null:new ByteArrayOutputStream(); while (sig == cfhSig) { ZipEntryInfo entryInfo = new ZipEntryInfo(); // Set Central directory file header offset entryInfo.centralHeaderOffset = rais.getOffset() - 4; // 4 for the header signature rais.readFully(cfh); ZipEntry ze = new ZipEntry(); int versionMadeBy = ZipShort.getValue(cfh, 0); // off += 2; ze.setPlatform((versionMadeBy >> 8) & 0x0F); // skip version info // off += 2; int gp = ZipShort.getValue(cfh, 4); // General purpose bit flag boolean isUTF8 = (gp&0x800)!=0; // Tests if bit 11 is set, signaling UTF-8 is used for filename and comment if(isUTF8) { entryInfo.encoding = UTF_8; LOGGER.info("Entry declared as UTF-8"); } else if(defaultEncodingSet) { entryInfo.encoding = defaultEncoding; LOGGER.info("Using default encoding: "+defaultEncoding); } else { // FileLogger.finest("Encoding will be detected later"); } entryInfo.hasDataDescriptor = (gp&8)!=0; // off += 2; int method = ZipShort.getValue(cfh, 6); // Note: ZipEntry#setMethod(int) will throw a java.lang.InternalError ("invalid compression method") if the // method is different from DEFLATED or STORED (happens with IMPLODED for example). // Thus we check the method ourselves to fail gracefully. if(method!=DEFLATED && method!=STORED) throw new ZipException("Unsupported compression method"); ze.setMethod(method); // off += 2; ze.setDosTime(ZipLong.getValue(cfh, 8)); // off += 4; ze.setCrc(ZipLong.getValue(cfh, 12)); // off += 4; ze.setCompressedSize(ZipLong.getValue(cfh, 16)); // off += 4; ze.setSize(ZipLong.getValue(cfh, 20)); // off += 4; int fileNameLen = ZipShort.getValue(cfh, 24); // off += 2; int extraLen = ZipShort.getValue(cfh, 26); // off += 2; int commentLen = ZipShort.getValue(cfh, 28); // off += 2; // skip disk number // off += 2; ze.setInternalAttributes(ZipShort.getValue(cfh, 32)); // off += 2; ze.setExternalAttributes(ZipLong.getValue(cfh, 34)); // off += 4; // Read filename bytes byte[] filename = new byte[fileNameLen]; rais.readFully(filename); // If the encoding is known already, set the String now if(entryInfo.encoding!=null) { setFilename(ze, getString(filename, entryInfo.encoding)); } else { // Keep the filename bytes, String will be encoded after entryInfo.filename = filename; // Accumulate those unidentified bytes for encoding detection feedEncodingAccumulator(encodingAccumulator, filename); } // Offset to local file header entryInfo.headerOffset = ZipLong.getValue(cfh, 38); // data offset will be filled later // Read and set extra bytes byte extra[] = new byte[extraLen]; rais.readFully(extra); ze.setExtra(extra); // Read comment bytes byte[] comment = new byte[commentLen]; rais.readFully(comment); // If the encoding is known already, set the String now if(entryInfo.encoding!=null) { ze.setComment(getString(comment, entryInfo.encoding)); } else { // Keep the comment bytes, String will be encoded after entryInfo.comment = comment; // Accumulate those unidentified bytes for encoding detection feedEncodingAccumulator(encodingAccumulator, comment); } entryInfo.centralHeaderLen = 46 + fileNameLen + extraLen + commentLen; // Add the new entry to the internal lists ze.setEntryInfo(entryInfo); entries.add(ze); nameMap.put(ze.getName(), ze); // Swallow signature rais.readFully(signatureBytes); sig = ZipLong.getValue(signatureBytes); } if(encodingAccumulator!=null && encodingAccumulator.size()>0) { int nbEntries = entries.size(); // Note: guessedEncoding may be null if no encoding could be detected. // In that case, the default system encoding will be used to create the string String guessedEncoding = EncodingDetector.detectEncoding(encodingAccumulator.toByteArray()); LOGGER.info("Guessed encoding: "+guessedEncoding); ZipEntry entry; ZipEntryInfo entryInfo; for(int i=0; i<nbEntries; i++) { entry = entries.elementAt(i); entryInfo = entry.getEntryInfo(); // Skip those entries for which we know the encoding already if(entryInfo.encoding != null) continue; entryInfo.encoding = guessedEncoding; setFilename(entry, getString(entryInfo.filename, guessedEncoding)); entryInfo.filename = null; entry.setComment(getString(entryInfo.comment, guessedEncoding)); entryInfo.comment = null; } } } /** * Sets the given filename in the ZipEntry. * * <p>This method detects filenames that use '\' as a path separator and replaces occurrences of '\' to '/'. * Zip specifications make it clear that '/' should always be used, even under FAT platforms, but some packers * (IZArc for instance) don't comply with the specs and use '/' anyway. We handle those paths only if the archive * was created under a FAT platform ; '\' is an allowed character under UNIX platforms: replacing them * would be a bad idea.</p> * * @param ze the ZipEntry object in which to set the filename * @param filename the filename to set */ private static void setFilename(ZipEntry ze, String filename) { if(ze.getPlatform()==ZipEntry.PLATFORM_FAT) filename = filename.replace('\\', '/'); ze.setName(filename); } /** * Feeds the given bytes to the encoding accumulator (used for encoding detection). The bytes will be ignored if * the accumulator has enough data already. * * @param encodingAccumulator the ByteArrayOutputStream that holds filename and comment bytes * @param bytes the bytes to feed to the encoding accumulator * @throws IOException if an I/O occurs (should never happen) */ private static void feedEncodingAccumulator(ByteArrayOutputStream encodingAccumulator, byte bytes[]) throws IOException { if(encodingAccumulator.size() < EncodingDetector.MAX_RECOMMENDED_BYTE_SIZE) encodingAccumulator.write(bytes); // Else accumulator has enough bytes, ignore the given bytes } /** Minimum possible size for the End Of Central Directory record (no comment) */ private static final int MIN_EOCD_SIZE = /* end of central dir signature */ 4 /* number of this disk */ + 2 /* number of the disk with the */ /* start of the central directory */ + 2 /* total number of entries in */ /* the central dir on this disk */ + 2 /* total number of entries in */ /* the central dir */ + 2 /* size of the central directory */ + 4 /* offset of start of central */ /* directory with respect to */ /* the starting disk number */ + 4 /* zipfile comment length */ + 2 /* zipfile comment */ + 0; /** Maximum possible size for the End Of Central Directory record (max comment size: 65535) */ private static final int MAX_EOCD_SIZE = /* end of central dir signature */ 4 /* number of this disk */ + 2 /* number of the disk with the */ /* start of the central directory */ + 2 /* total number of entries in */ /* the central dir on this disk */ + 2 /* total number of entries in */ /* the central dir */ + 2 /* size of the central directory */ + 4 /* offset of start of central */ /* directory with respect to */ /* the starting disk number */ + 4 /* zipfile comment length */ + 2 /* zipfile comment */ + 65535; private static final int CFD_LOCATOR_OFFSET = /* end of central dir signature */ 4 /* number of this disk */ + 2 /* number of the disk with the */ /* start of the central directory */ + 2 /* total number of entries in */ /* the central dir on this disk */ + 2 /* total number of entries in */ /* the central dir */ + 2 /* size of the central directory */ + 4; /** * Searches for the end of central dir record, parses * it and positions the stream at the first central directory * record. * * @throws IOException if an I/O error occurs * @throws ZipException if the end of central directory signature could not be found. This can be interpreted as the * underlying file not being a Zip file */ private void positionAtCentralDirectory() throws IOException, ZipException { long length = rais.getLength(); if(length<MIN_EOCD_SIZE) throw new ZipException("Invalid Zip file (too small)"); // Use a constant buffer size to always reuse the same instance byte[] buf = BufferPool.getByteArray(MAX_EOCD_SIZE); try { // Actual buffer length int bufLen = (int)Math.min(length, MAX_EOCD_SIZE); // Read the maximum size the EOCD can take. Much more effective than seeking backwards like we used to do. rais.seek(length-bufLen); StreamUtils.readFully(rais, buf, 0, bufLen); // Look for the EOCD signature by starting at the end and moving backwards boolean signatureFound = false; int off = bufLen - MIN_EOCD_SIZE; while (off>=0) { if (buf[off] == EOCD_SIG[0]) { if (buf[off+1] == EOCD_SIG[1]) { if (buf[off+2] == EOCD_SIG[2]) { if (buf[off+3] == EOCD_SIG[3]) { signatureFound = true; break; } } } } off--; } if (!signatureFound) { throw new ZipException("Invalid Zip stream (EOCD signature not found)"); } // Parse the offset to the central directory start off += CFD_LOCATOR_OFFSET; byte[] cdStart = new byte[4]; System.arraycopy(buf, off, cdStart, 0, 4); off += 4; // Fetch the global zip file comment byte[] commentLen = new byte[2]; System.arraycopy(buf, off, commentLen, 0, 2); off += 2; // Fetch the global zip file comment byte commentBytes[] = new byte[ZipShort.getValue(commentLen)]; System.arraycopy(buf, off, commentBytes, 0, commentBytes.length); // If no default encoding has been specified, try to guess the comment's encoding. // Note that the Zip format doesn't provide any way of knowing the encoding, not even a bit to indicate UTF-8 // like bit 11 in GPBF. comment = getString(commentBytes, defaultEncoding!=null?defaultEncoding:EncodingDetector.detectEncoding(commentBytes)); // Seek to the start of the central directory rais.seek(ZipLong.getValue(cdStart)); } finally { BufferPool.releaseByteArray(buf); } } /** * Creates and returns a String created using the given bytes and encoding. * If the specified encoding isn't supported, the platform's default encoding will be used. * * @param bytes the byte array to transform * @param encoding the encoding to use to instantiate the String * @return String instance that was created with the given encoding */ private static String getString(byte[] bytes, String encoding) { if(bytes.length==0) return ""; if(encoding!=null) { try { return new String(bytes, encoding); } catch(UnsupportedEncodingException e) { LOGGER.info("Error: unsupported encoding: {}, falling back to default encoding", encoding); } } // Fall back to platform's default encoding return new String(bytes); } /////////////////// // Inner classes // /////////////////// /** * InputStream that delegates requests to the underlying RandomAccessFile, making sure that only bytes from a * certain range can be read. */ private static class BoundedInputStream extends InputStream { private final RandomAccessInputStream rais; private long remaining; private long loc; private boolean addDummyByte = false; BoundedInputStream(RandomAccessInputStream rais, long start, long remaining) { this.rais = rais; this.remaining = remaining; loc = start; } @Override public int read() throws IOException { if (remaining-- <= 0) { if (addDummyByte) { addDummyByte = false; return 0; } return -1; } synchronized (rais) { rais.seek(loc++); return rais.read(); } } @Override public int read(byte[] b, int off, int len) throws IOException { if (remaining <= 0) { if (addDummyByte) { addDummyByte = false; b[off] = 0; return 1; } return -1; } if (len <= 0) { return 0; } if (len > remaining) { len = (int) remaining; } int ret; synchronized (rais) { rais.seek(loc); ret = rais.read(b, off, len); } if (ret > 0) { loc += ret; remaining -= ret; } return ret; } @Override public void close() throws IOException { rais.close(); } /** * Inflater needs an extra dummy byte for nowrap - see Inflater's javadocs. */ void addDummy() { addDummyByte = true; } } }