ZipReader.java example

Explorer
test-master
- bazel-master
// Copyright 2015 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.devtools.build.zip;

import static java.nio.charset.StandardCharsets.UTF_8;

import java.io.BufferedInputStream;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.nio.channels.Channels;
import java.nio.charset.Charset;
import java.util.Collection;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
import java.util.zip.ZipFile;

/**
 * A ZIP file reader.
 *
 * <p>This class provides entry data in the form of {@link ZipFileEntry}, which provides more detail
 * about the entry than the JDK equivalent {@link ZipEntry}. In addition to providing
 * {@link InputStream}s for entries, similar to JDK {@link ZipFile#getInputStream(ZipEntry)}, it
 * also provides access to the raw byte entry data via {@link #getRawInputStream(ZipFileEntry)}.
 *
 * <p>Using the raw access capabilities allows for more efficient ZIP file processing, such as
 * merging, by not requiring each entry's data to be decompressed when read.
 *
 * <p><em>NOTE:</em> The entries are read from the central directory. If the entry is not listed
 * there, it will not be returned from {@link #entries()} or {@link #getEntry(String)}.
 */
public class ZipReader implements Closeable, AutoCloseable {

  private final File file;
  private final RandomAccessFile in;
  private final ZipFileData zipData;

  /**
   * Opens a zip file for raw acceess.
   *
   * <p>The UTF-8 charset is used to decode the entry names and comments.
   *
   * @param file the zip file
   * @throws ZipException if a ZIP format error has occurred
   * @throws IOException if an I/O error has occurred
   */
  public ZipReader(File file) throws IOException {
    this(file, UTF_8);
  }

  /**
   * Opens a zip file for raw acceess.
   *
   * @param file the zip file
   * @param charset the charset to use to decode the entry names and comments
   * @throws ZipException if a ZIP format error has occurred
   * @throws IOException if an I/O error has occurred
   */
  public ZipReader(File file, Charset charset) throws IOException {
    this(file, charset, false);
  }

  /**
   * Opens a zip file for raw acceess.
   *
   * @param file the zip file
   * @param charset the charset to use to decode the entry names and comments
   * @param strictEntries force parsing to use the number of entries recorded in the end of
   *     central directory as the correct value, not as an estimate
   * @throws ZipException if a ZIP format error has occurred
   * @throws IOException if an I/O error has occurred
   */
  public ZipReader(File file, Charset charset, boolean strictEntries) throws IOException {
    if (file == null || charset == null) {
      throw new NullPointerException();
    }
    this.file = file;
    this.in = new RandomAccessFile(file, "r");
    this.zipData = new ZipFileData(charset);
    readCentralDirectory(strictEntries);
  }

  /**
   * Returns the zip file's name.
   */
  public String getFilename() {
    return file.getName();
  }

  /**
   * Returns the ZIP file comment.
   */
  public String getComment() {
    return zipData.getComment();
  }

  /**
   * Returns a collection of the ZIP file entries.
   */
  public Collection<ZipFileEntry> entries() {
    return zipData.getEntries();
  }

  /**
   * Returns the ZIP file entry for the specified name, or null if not found.
   */
  public ZipFileEntry getEntry(String name) {
    return zipData.getEntry(name);
  }

  /**
   * Returns the number of entries in the ZIP file.
   */
  public long size() {
    return zipData.getNumEntries();
  }

  /**
   * Returns an input stream for reading the contents of the specified ZIP file entry.
   *
   * <p>Closing this ZIP file will, in turn, close all input streams that have been returned by
   * invocations of this method.
   *
   * @param entry the ZIP file entry
   * @return the input stream for reading the contents of the specified zip file entry
   * @throws ZipException if a ZIP format error has occurred
   * @throws IOException if an I/O error has occurred
   */
  public InputStream getInputStream(ZipFileEntry entry) throws IOException {
    if (!zipData.getEntry(entry.getName()).equals(entry)) {
      throw new ZipException(String.format(
          "Zip file '%s' does not contain the requested entry '%s'.", file.getName(),
          entry.getName()));
    }
    return new ZipEntryInputStream(this, entry, /* raw */ false);
  }

  /**
   * Returns an input stream for reading the raw contents of the specified ZIP file entry.
   *
   * <p><em>NOTE:</em> No inflating will take place; The data read from the input stream will be
   * the exact byte content of the ZIP file entry on disk.
   *
   * <p>Closing this ZIP file will, in turn, close all input streams that have been returned by
   * invocations of this method.
   *
   * @param entry the ZIP file entry
   * @return the input stream for reading the contents of the specified zip file entry
   * @throws ZipException if a ZIP format error has occurred
   * @throws IOException if an I/O error has occurred
   */
  public InputStream getRawInputStream(ZipFileEntry entry) throws IOException {
    if (!zipData.getEntry(entry.getName()).equals(entry)) {
      throw new ZipException(String.format(
          "Zip file '%s' does not contain the requested entry '%s'.", file.getName(),
          entry.getName()));
    }
    return new ZipEntryInputStream(this, entry, /* raw */ true);
  }

  /**
   * Closes the ZIP file.
   *
   * <p>Closing this ZIP file will close all of the input streams previously returned by invocations
   * of the {@link #getRawInputStream(ZipFileEntry)} method.
   */
  @Override public void close() throws IOException {
    in.close();
  }

  /**
   * Finds, reads and parses ZIP file entries from the central directory.
   *
   * @param strictEntries force parsing to use the number of entries recorded in the end of
   *     central directory as the correct value, not as an estimate
   * @throws ZipException if a ZIP format error has occurred
   * @throws IOException if an I/O error has occurred
   */
  private void readCentralDirectory(boolean strictEntries) throws IOException {
    long eocdLocation = findEndOfCentralDirectoryRecord();
    InputStream stream = getStreamAt(eocdLocation);
    EndOfCentralDirectoryRecord.read(stream, zipData);

    if (zipData.isMaybeZip64()) {
      try {
        stream = getStreamAt(eocdLocation - Zip64EndOfCentralDirectoryLocator.FIXED_DATA_SIZE);
        Zip64EndOfCentralDirectoryLocator.read(stream, zipData);

        stream = getStreamAt(zipData.getZip64EndOfCentralDirectoryOffset());
        Zip64EndOfCentralDirectory.read(stream, zipData);
      } catch (ZipException e) {
        // expected if not in Zip64 format
      }
    }

    if (zipData.isZip64() || strictEntries) {
      // If in Zip64 format or using strict entry numbers, use the parsed information as is to read
      // the central directory file headers.
      readCentralDirectoryFileHeaders(zipData.getExpectedEntries(),
          zipData.getCentralDirectoryOffset());
    } else {
      // If not in Zip64 format, compute central directory offset by end of central directory record
      // offset and central directory size to allow reading large non-compliant Zip32 directories.
      long centralDirectoryOffset = eocdLocation - zipData.getCentralDirectorySize();
      // If the lower 4 bytes match, the above calculation is correct; otherwise fallback to
      // reported offset.
      if ((int) centralDirectoryOffset == (int) zipData.getCentralDirectoryOffset()) {
        readCentralDirectoryFileHeaders(centralDirectoryOffset);
      } else {
        readCentralDirectoryFileHeaders(zipData.getExpectedEntries(),
            zipData.getCentralDirectoryOffset());
      }
    }
  }

  /**
   * Looks for the target sub array in the buffer scanning backwards starting at offset. Returns the
   * index where the target is found or -1 if not found.
   *
   * @param target the sub array to find
   * @param buffer the array to scan
   * @param offset the index of where to begin scanning
   * @return the index of target within buffer or -1 if not found
   */
  private int scanBackwards(byte[] target, byte[] buffer, int offset) {
    int start = Math.min(offset, buffer.length - target.length);
    for (int i = start; i >= 0; i--) {
      for (int j = 0; j < target.length; j++) {
        if (buffer[i + j] != target[j]) {
          break;
        } else if (j == target.length - 1) {
          return i;
        }
      }
    }
    return -1;
  }

  /**
   * Finds the file offset of the end of central directory record.
   *
   * @return the file offset of the end of central directory record
   * @throws ZipException if a ZIP format error has occurred
   * @throws IOException if an I/O error has occurred
   */
  private long findEndOfCentralDirectoryRecord() throws IOException {
    byte[] signature = ZipUtil.intToLittleEndian(EndOfCentralDirectoryRecord.SIGNATURE);
    byte[] buffer = new byte[(int) Math.min(64, in.length())];
    int readLength = buffer.length;
    if (readLength < EndOfCentralDirectoryRecord.FIXED_DATA_SIZE) {
      throw new ZipException(String.format("Zip file '%s' is malformed. It does not contain an end"
          + " of central directory record.", file.getName()));
    }

    long offset = in.length() - buffer.length;
    while (offset >= 0) {
      in.seek(offset);
      in.readFully(buffer, 0, readLength);
      int signatureLocation = scanBackwards(signature, buffer, buffer.length);
      while (signatureLocation != -1) {
        long eocdSize = in.length() - offset - signatureLocation;
        if (eocdSize >= EndOfCentralDirectoryRecord.FIXED_DATA_SIZE) {
          int commentLength = ZipUtil.getUnsignedShort(buffer, signatureLocation
              + EndOfCentralDirectoryRecord.COMMENT_LENGTH_OFFSET);
          long readCommentLength = eocdSize - EndOfCentralDirectoryRecord.FIXED_DATA_SIZE;
          if (commentLength == readCommentLength) {
            return offset + signatureLocation;
          }
        }
        signatureLocation = scanBackwards(signature, buffer, signatureLocation - 1);
      }
      readLength = buffer.length - 3;
      buffer[buffer.length - 3] = buffer[0];
      buffer[buffer.length - 2] = buffer[1];
      buffer[buffer.length - 1] = buffer[2];
      offset -= readLength;
    }
    throw new ZipException(String.format("Zip file '%s' is malformed. It does not contain an end"
        + " of central directory record.", file.getName()));
  }

  /**
   * Reads and parses ZIP file entries from the central directory.
   *
   * @param count the number of entries in the central directory
   * @param fileOffset the file offset of the start of the central directory
   * @throws ZipException if a ZIP format error has occurred
   * @throws IOException if an I/O error has occurred
   */
  private void readCentralDirectoryFileHeaders(long count, long fileOffset) throws IOException {
    InputStream centralDirectory = getStreamAt(fileOffset);
    for (long i = 0; i < count; i++) {
      ZipFileEntry entry = CentralDirectoryFileHeader.read(centralDirectory, zipData.getCharset());
      zipData.addEntry(entry);
    }
  }

  /**
   * Reads and parses ZIP file entries from the central directory.
   *
   * @param fileOffset the file offset of the start of the central directory
   * @throws ZipException if a ZIP format error has occurred
   * @throws IOException if an I/O error has occurred
   */
  private void readCentralDirectoryFileHeaders(long fileOffset) throws IOException {
    CountingInputStream centralDirectory = new CountingInputStream(getStreamAt(fileOffset));
    while (centralDirectory.getCount() < zipData.getCentralDirectorySize()) {
      ZipFileEntry entry = CentralDirectoryFileHeader.read(centralDirectory, zipData.getCharset());
      zipData.addEntry(entry);
    }
  }

  /**
   * Returns a new {@link InputStream} positioned at fileOffset.
   *
   * @throws IOException if an I/O error has occurred
   */
  protected InputStream getStreamAt(long fileOffset) throws IOException {
    return new BufferedInputStream(Channels.newInputStream(in.getChannel().position(fileOffset)));
  }
}