/* * JBoss, Home of Professional Open Source. * Copyright 2011, Red Hat, Inc., and individual contributors * as indicated by the @author tags. See the copyright.txt file in the * distribution for a full listing of individual contributors. * * This is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this software; if not, write to the Free * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA, or see the FSF site: http://www.fsf.org. */ package org.jboss.as.server.deployment.scanner; import java.io.Closeable; import java.io.EOFException; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.FileChannel; import org.jboss.as.server.deployment.scanner.logging.DeploymentScannerLogger; /** * Scans a zip file to check whether the complete end of directory record is * present, indicating the file content (or at least the zip portion) is complete. * * @author Brian Stansberry (c) 2011 Red Hat Inc. */ public class ZipCompletionScanner { /** Local file header marker */ public static final long LOCSIG = 0x04034b50L; /** Extra data descriptor marker */ public static final long EXTSIG = 0x08074b50L; /** Central directory file header marker */ public static final long CENSIG = 0x02014b50L; /** End of central directory record marker */ public static final long ENDSIG = 0x06054b50L; /** Length of the fixed portion of a local file header */ public static final int LOCLEN = 30; /** Length of the fixed portion of a central directory file header */ public static final int CENLEN = 46; /** Length of the fixed portion of an End of central directory record */ public static final int ENDLEN = 22; /** Position of the filename length in a local file header */ public static final int LOC_FILENAMELEN = 26; /** Position of the extra field length in a local file header */ public static final int LOC_EXTFLDLEN = 28; /** Position of the associated local file's compressed size in the central directory file header */ public static final int CENSIZ = 20; /** Position of the associated local file's offset in the central directory file header */ public static final int CEN_LOC_OFFSET = 32; /** Position of the 'start of central directory' field in an end of central directory record */ public static final int END_CENSTART = 16; /** END_CENSTART value that indicates the zip is in ZIP 64 format */ public static final long ZIP64_MARKER = 0xFFFFFFFFL; /** Position of the comment length in an end of central directory record */ public static final int END_COMMENTLEN = 20; private static final int MAX_REVERSE_SCAN = (1 << 16) + ENDLEN; private static final int CHUNK_SIZE = 4096; private static final int ALPHABET_SIZE = 256; private static final byte[] ENDSIG_PATTERN = new byte[]{0x06, 0x05, 0x4b, 0x50}; private static final int SIG_PATTERN_LENGTH = 4; private static final int[] END_BAD_BYTE_SKIP = new int[ALPHABET_SIZE]; private static final byte[] LOCSIG_PATTERN = new byte[]{0x50, 0x4b, 0x03, 0x04}; private static final int[] LOC_BAD_BYTE_SKIP = new int[ALPHABET_SIZE]; static { // Set up the Boyer Moore "bad character arrays" for our 2 patterns computeBadByteSkipArray(ENDSIG_PATTERN, END_BAD_BYTE_SKIP); computeBadByteSkipArray(LOCSIG_PATTERN, LOC_BAD_BYTE_SKIP); } /** Prevent instantiation */ private ZipCompletionScanner() {} /** * Scans the given file looking for a complete zip file format end of central directory record. * * @param file the file * * @return true if a complete end of central directory record could be found * * @throws IOException */ public static boolean isCompleteZip(File file) throws IOException, NonScannableZipException { FileChannel channel = null; try { channel = new FileInputStream(file).getChannel(); long size = channel.size(); if (size < ENDLEN) { // Obvious case return false; } else if (validateEndRecord(file, channel, size - ENDLEN)) { // typical case where file is complete and end record has no comment return true; } // Either file is incomplete or the end of central directory record includes an arbitrary length comment // So, we have to scan backwards looking for an end of central directory record return scanForEndSig(file, channel); } finally { safeClose(channel); } } /** * Validates that the data structure at position startEndRecord has a field in the expected position * that points to the start of the first central directory file, and, if so, that the file * has a complete end of central directory record comment at the end. * * @param file the file being checked * @param channel the channel * @param startEndRecord the start of the end of central directory record * * @return true if it can be confirmed that the end of directory record points to a central directory * file and a complete comment is present, false otherwise * * @throws IOException * @throws NonScannableZipException */ private static boolean validateEndRecord(File file, FileChannel channel, long startEndRecord) throws IOException, NonScannableZipException { try { channel.position(startEndRecord); ByteBuffer endDirHeader = getByteBuffer(ENDLEN); read(endDirHeader, channel); if (endDirHeader.limit() < ENDLEN) { // Couldn't read the full end of central directory record header return false; } else if (getUnsignedInt(endDirHeader, 0) != ENDSIG) { return false; } long pos = getUnsignedInt(endDirHeader, END_CENSTART); // TODO deal with Zip64 if (pos == ZIP64_MARKER) { throw new NonScannableZipException(file, true); } ByteBuffer cdfhBuffer = getByteBuffer(CENLEN); read(cdfhBuffer, channel, pos); long header = getUnsignedInt(cdfhBuffer, 0); if (header == CENSIG) { long firstLoc = getUnsignedInt(cdfhBuffer, CEN_LOC_OFFSET); long firstSize = getUnsignedInt(cdfhBuffer, CENSIZ); if (firstLoc == 0) { // normal case -- first bytes are the first local file if (!validateLocalFileRecord(channel, 0, firstSize)) { return false; } } else { // confirm that firstLoc is indeed the first local file long fileFirstLoc = scanForLocSig(channel); if (firstLoc != fileFirstLoc) { if (fileFirstLoc == 0) { return false; } else { // scanForLocSig() found a LOCSIG, but not at position zero and not // at the expected position. // With a file like this, we can't tell if we're in a nested zip // or we're in an outer zip and had the bad luck to find random bytes // that look like LOCSIG. This file cannot be autodeployed. throw new NonScannableZipException(file, false); } } } // At this point, endDirHeader points to the correct end of central dir record. // Just need to validate the record is complete, including any comment int commentLen = getUnsignedShort(endDirHeader, END_COMMENTLEN); long commentEnd = startEndRecord + ENDLEN + commentLen; return commentEnd <= channel.size(); } return false; } catch (EOFException eof) { // pos or firstLoc weren't really positions and moved us to an invalid location return false; } } /** * Boyer Moore scan that proceeds backwards from the end of the file looking for ENDSIG * @throws NonScannableZipException */ private static boolean scanForEndSig(File file, FileChannel channel) throws IOException, NonScannableZipException { // TODO Consider just reading in MAX_REVERSE_SCAN bytes -- increased peak memory cost but less complex ByteBuffer bb = getByteBuffer(CHUNK_SIZE); long start = channel.size(); long end = Math.max(0, start - MAX_REVERSE_SCAN); long channelPos = Math.max(0, start - CHUNK_SIZE); long lastChannelPos = channelPos; boolean firstRead = true; while (lastChannelPos >= end) { read(bb, channel, channelPos); int actualRead = bb.limit(); if (firstRead) { long expectedRead = Math.min(CHUNK_SIZE, start); if (actualRead > expectedRead) { // File is still growing return false; } firstRead = false; } int bufferPos = actualRead -1; while (bufferPos >= SIG_PATTERN_LENGTH) { // Following is based on the Boyer Moore algorithm but simplified to reflect // a) the pattern is static // b) the pattern has no repeating bytes int patternPos; for (patternPos = SIG_PATTERN_LENGTH - 1; patternPos >= 0 && ENDSIG_PATTERN[patternPos] == bb.get(bufferPos - patternPos); --patternPos) { // empty loop while bytes match } // Switch gives same results as checking the "good suffix array" in the Boyer Moore algorithm switch (patternPos) { case -1: { // Pattern matched. Confirm is this is the start of a valid end of central dir record long startEndRecord = channelPos + bufferPos - SIG_PATTERN_LENGTH + 1; if (validateEndRecord(file, channel, startEndRecord)) { return true; } // wasn't a valid end record; continue scan bufferPos -= 4; break; } case 3: { // No bytes matched; the common case. // With our pattern, this is the only case where the Boyer Moore algorithm's "bad char array" may // produce a shift greater than the "good suffix array" (which would shift 1 byte) int idx = bb.get(bufferPos - patternPos) - Byte.MIN_VALUE; bufferPos -= END_BAD_BYTE_SKIP[idx]; break; } default: // 1 or more bytes matched bufferPos -= 4; } } // Move back a full chunk. If we didn't read a full chunk, that's ok, // it means we read all data and the outer while loop will terminate if (channelPos <= bufferPos) { break; } lastChannelPos = channelPos; channelPos -= Math.min(channelPos - bufferPos, CHUNK_SIZE - bufferPos); } return false; } /** Boyer Moore scan that proceeds forwards from the end of the file looking for the first LOCSIG */ private static long scanForLocSig(FileChannel channel) throws IOException { channel.position(0); ByteBuffer bb = getByteBuffer(CHUNK_SIZE); long end = channel.size(); while (channel.position() <= end) { read(bb, channel); int bufferPos = 0; while (bufferPos <= bb.limit() - SIG_PATTERN_LENGTH) { // Following is based on the Boyer Moore algorithm but simplified to reflect // a) the size of the pattern is static // b) the pattern is static and has no repeating bytes int patternPos; for (patternPos = SIG_PATTERN_LENGTH - 1; patternPos >= 0 && LOCSIG_PATTERN[patternPos] == bb.get(bufferPos + patternPos); --patternPos) { // empty loop while bytes match } // Outer switch gives same results as checking the "good suffix array" in the Boyer Moore algorithm switch (patternPos) { case -1: { // Pattern matched. Confirm is this is the start of a valid local file record long startLocRecord = channel.position() - bb.limit() + bufferPos; long currentPos = channel.position(); if (validateLocalFileRecord(channel, startLocRecord, -1)) { return startLocRecord; } // Restore position in case it shifted channel.position(currentPos); // wasn't a valid local file record; continue scan bufferPos += 4; break; } case 3: { // No bytes matched; the common case. // With our pattern, this is the only case where the Boyer Moore algorithm's "bad char array" may // produce a shift greater than the "good suffix array" (which would shift 1 byte) int idx = bb.get(bufferPos + patternPos) - Byte.MIN_VALUE; bufferPos += LOC_BAD_BYTE_SKIP[idx]; break; } default: // 1 or more bytes matched bufferPos += 4; } } } return -1; } /** * Checks that the data starting at startLocRecord looks like a local file record header. * * @param channel the channel * @param startLocRecord offset into channel of the start of the local record * @param compressedSize expected compressed size of the file, or -1 to indicate this isn't known */ private static boolean validateLocalFileRecord(FileChannel channel, long startLocRecord, long compressedSize) throws IOException { ByteBuffer lfhBuffer = getByteBuffer(LOCLEN); read(lfhBuffer, channel, startLocRecord); if (lfhBuffer.limit() < LOCLEN || getUnsignedInt(lfhBuffer, 0) != LOCSIG) { return false; } if (compressedSize == -1) { // We can't further evaluate return true; } int fnLen = getUnsignedShort(lfhBuffer, LOC_FILENAMELEN); int extFieldLen = getUnsignedShort(lfhBuffer, LOC_EXTFLDLEN); long nextSigPos = startLocRecord + LOCLEN + compressedSize + fnLen + extFieldLen; read(lfhBuffer, channel, nextSigPos); long header = getUnsignedInt(lfhBuffer, 0); return header == LOCSIG || header == EXTSIG || header == CENSIG; } private static ByteBuffer getByteBuffer(int capacity) { ByteBuffer b = ByteBuffer.allocate(capacity); b.order(ByteOrder.LITTLE_ENDIAN); return b; } private static void read(ByteBuffer bb, FileChannel ch) throws IOException { bb.clear(); ch.read(bb); bb.flip(); } private static void read(ByteBuffer bb, FileChannel ch, long pos) throws IOException { bb.clear(); ch.read(bb, pos); bb.flip(); } private static long getUnsignedInt(ByteBuffer bb, int offset) { return (bb.getInt(offset) & 0xffffffffL); } private static int getUnsignedShort(ByteBuffer bb, int offset) { return (bb.getShort(offset) & 0xffff); } /** Fills the Boyer Moore "bad character array" for the given pattern */ private static void computeBadByteSkipArray(byte[] pattern, int[] badByteArray) { for (int a = 0; a < ALPHABET_SIZE; a++) { badByteArray[a] = pattern.length; } for (int j = 0; j < pattern.length - 1; j++) { badByteArray[pattern[j] - Byte.MIN_VALUE] = pattern.length - j - 1; } } private static void safeClose(Closeable closeable) { if (closeable != null) { try { closeable.close(); } catch (Exception ignored) { } } } public static class NonScannableZipException extends Exception { private static final long serialVersionUID = -5794753842070509152L; private NonScannableZipException(final File file, final boolean zip64) { super(zip64 ? getZip64Message(file) : getNonStandardMessage(file)); } private static String getNonStandardMessage(final File file) { return DeploymentScannerLogger.ROOT_LOGGER.invalidZipFileFormat(file.getAbsolutePath()); } private static String getZip64Message(final File file) { return DeploymentScannerLogger.ROOT_LOGGER.invalidZip64FileFormat(file.getAbsolutePath()); } } }