// Copyright 2015 The Bazel Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.devtools.build.zip; import static java.nio.charset.StandardCharsets.UTF_8; import java.io.BufferedInputStream; import java.io.Closeable; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.RandomAccessFile; import java.nio.channels.Channels; import java.nio.charset.Charset; import java.util.Collection; import java.util.zip.ZipEntry; import java.util.zip.ZipException; import java.util.zip.ZipFile; /** * A ZIP file reader. * * <p>This class provides entry data in the form of {@link ZipFileEntry}, which provides more detail * about the entry than the JDK equivalent {@link ZipEntry}. In addition to providing * {@link InputStream}s for entries, similar to JDK {@link ZipFile#getInputStream(ZipEntry)}, it * also provides access to the raw byte entry data via {@link #getRawInputStream(ZipFileEntry)}. * * <p>Using the raw access capabilities allows for more efficient ZIP file processing, such as * merging, by not requiring each entry's data to be decompressed when read. * * <p><em>NOTE:</em> The entries are read from the central directory. If the entry is not listed * there, it will not be returned from {@link #entries()} or {@link #getEntry(String)}. */ public class ZipReader implements Closeable, AutoCloseable { private final File file; private final RandomAccessFile in; private final ZipFileData zipData; /** * Opens a zip file for raw acceess. * * <p>The UTF-8 charset is used to decode the entry names and comments. * * @param file the zip file * @throws ZipException if a ZIP format error has occurred * @throws IOException if an I/O error has occurred */ public ZipReader(File file) throws IOException { this(file, UTF_8); } /** * Opens a zip file for raw acceess. * * @param file the zip file * @param charset the charset to use to decode the entry names and comments * @throws ZipException if a ZIP format error has occurred * @throws IOException if an I/O error has occurred */ public ZipReader(File file, Charset charset) throws IOException { this(file, charset, false); } /** * Opens a zip file for raw acceess. * * @param file the zip file * @param charset the charset to use to decode the entry names and comments * @param strictEntries force parsing to use the number of entries recorded in the end of * central directory as the correct value, not as an estimate * @throws ZipException if a ZIP format error has occurred * @throws IOException if an I/O error has occurred */ public ZipReader(File file, Charset charset, boolean strictEntries) throws IOException { if (file == null || charset == null) { throw new NullPointerException(); } this.file = file; this.in = new RandomAccessFile(file, "r"); this.zipData = new ZipFileData(charset); readCentralDirectory(strictEntries); } /** * Returns the zip file's name. */ public String getFilename() { return file.getName(); } /** * Returns the ZIP file comment. */ public String getComment() { return zipData.getComment(); } /** * Returns a collection of the ZIP file entries. */ public Collection<ZipFileEntry> entries() { return zipData.getEntries(); } /** * Returns the ZIP file entry for the specified name, or null if not found. */ public ZipFileEntry getEntry(String name) { return zipData.getEntry(name); } /** * Returns the number of entries in the ZIP file. */ public long size() { return zipData.getNumEntries(); } /** * Returns an input stream for reading the contents of the specified ZIP file entry. * * <p>Closing this ZIP file will, in turn, close all input streams that have been returned by * invocations of this method. * * @param entry the ZIP file entry * @return the input stream for reading the contents of the specified zip file entry * @throws ZipException if a ZIP format error has occurred * @throws IOException if an I/O error has occurred */ public InputStream getInputStream(ZipFileEntry entry) throws IOException { if (!zipData.getEntry(entry.getName()).equals(entry)) { throw new ZipException(String.format( "Zip file '%s' does not contain the requested entry '%s'.", file.getName(), entry.getName())); } return new ZipEntryInputStream(this, entry, /* raw */ false); } /** * Returns an input stream for reading the raw contents of the specified ZIP file entry. * * <p><em>NOTE:</em> No inflating will take place; The data read from the input stream will be * the exact byte content of the ZIP file entry on disk. * * <p>Closing this ZIP file will, in turn, close all input streams that have been returned by * invocations of this method. * * @param entry the ZIP file entry * @return the input stream for reading the contents of the specified zip file entry * @throws ZipException if a ZIP format error has occurred * @throws IOException if an I/O error has occurred */ public InputStream getRawInputStream(ZipFileEntry entry) throws IOException { if (!zipData.getEntry(entry.getName()).equals(entry)) { throw new ZipException(String.format( "Zip file '%s' does not contain the requested entry '%s'.", file.getName(), entry.getName())); } return new ZipEntryInputStream(this, entry, /* raw */ true); } /** * Closes the ZIP file. * * <p>Closing this ZIP file will close all of the input streams previously returned by invocations * of the {@link #getRawInputStream(ZipFileEntry)} method. */ @Override public void close() throws IOException { in.close(); } /** * Finds, reads and parses ZIP file entries from the central directory. * * @param strictEntries force parsing to use the number of entries recorded in the end of * central directory as the correct value, not as an estimate * @throws ZipException if a ZIP format error has occurred * @throws IOException if an I/O error has occurred */ private void readCentralDirectory(boolean strictEntries) throws IOException { long eocdLocation = findEndOfCentralDirectoryRecord(); InputStream stream = getStreamAt(eocdLocation); EndOfCentralDirectoryRecord.read(stream, zipData); if (zipData.isMaybeZip64()) { try { stream = getStreamAt(eocdLocation - Zip64EndOfCentralDirectoryLocator.FIXED_DATA_SIZE); Zip64EndOfCentralDirectoryLocator.read(stream, zipData); stream = getStreamAt(zipData.getZip64EndOfCentralDirectoryOffset()); Zip64EndOfCentralDirectory.read(stream, zipData); } catch (ZipException e) { // expected if not in Zip64 format } } if (zipData.isZip64() || strictEntries) { // If in Zip64 format or using strict entry numbers, use the parsed information as is to read // the central directory file headers. readCentralDirectoryFileHeaders(zipData.getExpectedEntries(), zipData.getCentralDirectoryOffset()); } else { // If not in Zip64 format, compute central directory offset by end of central directory record // offset and central directory size to allow reading large non-compliant Zip32 directories. long centralDirectoryOffset = eocdLocation - zipData.getCentralDirectorySize(); // If the lower 4 bytes match, the above calculation is correct; otherwise fallback to // reported offset. if ((int) centralDirectoryOffset == (int) zipData.getCentralDirectoryOffset()) { readCentralDirectoryFileHeaders(centralDirectoryOffset); } else { readCentralDirectoryFileHeaders(zipData.getExpectedEntries(), zipData.getCentralDirectoryOffset()); } } } /** * Looks for the target sub array in the buffer scanning backwards starting at offset. Returns the * index where the target is found or -1 if not found. * * @param target the sub array to find * @param buffer the array to scan * @param offset the index of where to begin scanning * @return the index of target within buffer or -1 if not found */ private int scanBackwards(byte[] target, byte[] buffer, int offset) { int start = Math.min(offset, buffer.length - target.length); for (int i = start; i >= 0; i--) { for (int j = 0; j < target.length; j++) { if (buffer[i + j] != target[j]) { break; } else if (j == target.length - 1) { return i; } } } return -1; } /** * Finds the file offset of the end of central directory record. * * @return the file offset of the end of central directory record * @throws ZipException if a ZIP format error has occurred * @throws IOException if an I/O error has occurred */ private long findEndOfCentralDirectoryRecord() throws IOException { byte[] signature = ZipUtil.intToLittleEndian(EndOfCentralDirectoryRecord.SIGNATURE); byte[] buffer = new byte[(int) Math.min(64, in.length())]; int readLength = buffer.length; if (readLength < EndOfCentralDirectoryRecord.FIXED_DATA_SIZE) { throw new ZipException(String.format("Zip file '%s' is malformed. It does not contain an end" + " of central directory record.", file.getName())); } long offset = in.length() - buffer.length; while (offset >= 0) { in.seek(offset); in.readFully(buffer, 0, readLength); int signatureLocation = scanBackwards(signature, buffer, buffer.length); while (signatureLocation != -1) { long eocdSize = in.length() - offset - signatureLocation; if (eocdSize >= EndOfCentralDirectoryRecord.FIXED_DATA_SIZE) { int commentLength = ZipUtil.getUnsignedShort(buffer, signatureLocation + EndOfCentralDirectoryRecord.COMMENT_LENGTH_OFFSET); long readCommentLength = eocdSize - EndOfCentralDirectoryRecord.FIXED_DATA_SIZE; if (commentLength == readCommentLength) { return offset + signatureLocation; } } signatureLocation = scanBackwards(signature, buffer, signatureLocation - 1); } readLength = buffer.length - 3; buffer[buffer.length - 3] = buffer[0]; buffer[buffer.length - 2] = buffer[1]; buffer[buffer.length - 1] = buffer[2]; offset -= readLength; } throw new ZipException(String.format("Zip file '%s' is malformed. It does not contain an end" + " of central directory record.", file.getName())); } /** * Reads and parses ZIP file entries from the central directory. * * @param count the number of entries in the central directory * @param fileOffset the file offset of the start of the central directory * @throws ZipException if a ZIP format error has occurred * @throws IOException if an I/O error has occurred */ private void readCentralDirectoryFileHeaders(long count, long fileOffset) throws IOException { InputStream centralDirectory = getStreamAt(fileOffset); for (long i = 0; i < count; i++) { ZipFileEntry entry = CentralDirectoryFileHeader.read(centralDirectory, zipData.getCharset()); zipData.addEntry(entry); } } /** * Reads and parses ZIP file entries from the central directory. * * @param fileOffset the file offset of the start of the central directory * @throws ZipException if a ZIP format error has occurred * @throws IOException if an I/O error has occurred */ private void readCentralDirectoryFileHeaders(long fileOffset) throws IOException { CountingInputStream centralDirectory = new CountingInputStream(getStreamAt(fileOffset)); while (centralDirectory.getCount() < zipData.getCentralDirectorySize()) { ZipFileEntry entry = CentralDirectoryFileHeader.read(centralDirectory, zipData.getCharset()); zipData.addEntry(entry); } } /** * Returns a new {@link InputStream} positioned at fileOffset. * * @throws IOException if an I/O error has occurred */ protected InputStream getStreamAt(long fileOffset) throws IOException { return new BufferedInputStream(Channels.newInputStream(in.getChannel().position(fileOffset))); } }