/**
* <copyright>
*
* Copyright (c) 2004, 2010 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM - Initial API and implementation
*
* </copyright>
*
* $Id$
*/
package net.enilink.komma.common.archive;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilterInputStream;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;
import net.enilink.komma.core.URIs;
/**
* A connection that can access an entry in an archive, and then recursively an
* entry in that archive, and so on. For example, it can be used just like jar:
* or zip:, only the archive paths can repeat, e.g.,
*
* <pre>
* archive:file:///c:/temp/example.zip!/org/example/nested.zip!/org/example/deeply-nested.html
* </pre>
*
* The general recursive pattern is
*
* <pre>
* archive:$nestedURL${/!$archivePath$}+
* </pre>
*
* So the nested URL for the example above is
*
* <pre>
* file:///c:/temp/example.zip
* </pre>
*
* <p>
* Since the nested URL may itself contain archive schemes, the subsequence of
* the archive paths that should be associated with the nested URL is determined
* by finding the nth archive separator, i.e., the nth !/, where n is the number
* of ":"s before the first "/" of the nested URL, i.e., the number of nested
* schemes. For example, for a more complex case where the nested URL is itself
* an archive-based scheme, e.g.,
*
* <pre>
* archive:jar:file:///c:/temp/example.zip!/org/example/nested.zip!/org/example/deeply-nested.html
* </pre>
*
* the nested URL is correctly parsed to skip to the second archive separator as
*
* <pre>
* jar:file:///c:/temp/example.zip!/org/example/nested.zip
* </pre>
*
* </p>
*
* <p>
* The logic for accessing archives can be tailored and reused independant from
* its usage as a URL connection. This is normally done by using the constructor
* {@link #ArchiveURLConnection(String)} and overriding
* {@link #createInputStream(String)} and {@link #createOutputStream(String)}.
* The behavior can be tailored by overriding {@link #emulateArchiveScheme()}
* and {@link #useZipFile()}.
* </p>
*/
public class ArchiveURLConnection extends URLConnection {
/**
* The cached string version of the {@link #url URL}.
*/
protected String urlString;
/**
* Constructs a new connection for the URL.
*
* @param url
* the URL of this connection.
*/
public ArchiveURLConnection(URL url) {
super(url);
urlString = url.toString();
}
/**
* Constructs a new archive accessor. This constructor forwards a null URL
* to be super constructor, so an instance built with this constructor
* <b>cannot</b> be used as a URLConnection. The logic for accessing
* archives and for delegating to the nested URL can be reused in other
* applications, without creating an URLs.
*
* @param url
* the URL of the archive.
*/
protected ArchiveURLConnection(String url) {
super(null);
urlString = url;
}
/**
* </p> Returns whether the implementation will handle all the archive
* accessors directly. For example, whether
*
* <pre>
* archive:jar:file:///c:/temp/example.zip!/org/example/nested.zip!/org/example/deeply-nested.html
* </pre>
*
* will be handled as if it were specified as
*
* <pre>
* archive:file:///c:/temp/example.zip!/org/example/nested.zip!/org/example/deeply-nested.html
* </pre>
*
* Override this only if you are reusing the logic of retrieving an input
* stream into an archive and hence are likely to be overriding
* createInputStream, which is the point of delegation to the nested URL for
* recursive stream creation. </p>
*
* @return whether the implementation will handle all the archive accessors
* directly.
*/
protected boolean emulateArchiveScheme() {
return false;
}
/**
* Returns whether to handle the special case of a nested URL with file:
* schema using a {@link ZipFile}. This gives more efficient direct access
* to the root entry, e.g.,
*
* <pre>
* archive:file:///c:/temp/example.zip!/org/example/nested.html
* </pre>
*
* @return whether to handle the special case of a nested URL with file:
* schema using a ZipFile.
*/
protected boolean useZipFile() {
return false;
}
/**
* Record that this is connected.
*/
@Override
public void connect() throws IOException {
connected = true;
}
protected String getNestedURL() throws IOException {
// There must be at least one archive path.
//
int archiveSeparator = urlString.indexOf("!/");
if (archiveSeparator < 0) {
throw new MalformedURLException("missing archive separators "
+ urlString);
}
// There needs to be another URL protocol right after the archive
// protocol, and not a "/".
//
int start = urlString.indexOf(':') + 1;
if (start > urlString.length() || urlString.charAt(start) == '/') {
throw new IllegalArgumentException(
"archive protocol must be immediately followed by another URL protocol "
+ urlString);
}
// Parse to extract the archives that will be delegated to the nested
// URL based on the number of schemes at the start.
//
for (int i = start, end = urlString.indexOf("/") - 1; (i = urlString
.indexOf(":", i)) < end;) {
if (emulateArchiveScheme()) {
// Skip a scheme for the archive accessor to be handled directly
// here.
//
start = ++i;
} else {
// Skip an archive accessor to be handled by delegation to the
// scheme in nested URL.
//
archiveSeparator = urlString
.indexOf("!/", archiveSeparator + 2);
if (archiveSeparator < 0) {
throw new MalformedURLException(
"too few archive separators " + urlString);
}
++i;
}
}
return urlString.substring(start, archiveSeparator);
}
/**
* Creates the input stream for the URL.
*
* @return the input stream for the URL.
*/
@Override
public InputStream getInputStream() throws IOException {
// Create the delegate URL.
//
String nestedURL = getNestedURL();
// The cutoff point to the next archive.
//
int archiveSeparator = urlString.indexOf(nestedURL)
+ nestedURL.length();
int nextArchiveSeparator = urlString
.indexOf("!/", archiveSeparator + 2);
// Construct the input stream in a special efficient way for case of a
// file scheme.
//
InputStream inputStream;
ZipEntry inputZipEntry = null;
if (!useZipFile() || !nestedURL.startsWith("file:")) {
// Just get the stream from the URL.
//
inputStream = createInputStream(nestedURL);
} else {
// The name to be used for the entry.
//
String entry = URIs.decode(nextArchiveSeparator < 0 ? urlString
.substring(archiveSeparator + 2) : urlString.substring(
archiveSeparator + 2, nextArchiveSeparator));
// Skip over this archive path to the next one, since we are
// handling this one special.
//
archiveSeparator = nextArchiveSeparator;
nextArchiveSeparator = urlString
.indexOf("!/", archiveSeparator + 2);
// Go directly to the right entry in the zip file,
// get the stream,
// and wrap it so that closing it closes the zip file.
//
final ZipFile zipFile = new ZipFile(URIs.decode(nestedURL
.substring(5)));
inputZipEntry = zipFile.getEntry(entry);
InputStream zipEntryInputStream = inputZipEntry == null ? null
: zipFile.getInputStream(inputZipEntry);
if (zipEntryInputStream == null) {
throw new IOException("Archive entry not found " + urlString);
}
inputStream = new FilterInputStream(zipEntryInputStream) {
@Override
public void close() throws IOException {
super.close();
zipFile.close();
}
};
}
// Loop over the archive paths.
//
LOOP: while (archiveSeparator > 0) {
inputZipEntry = null;
// The entry name to be matched.
//
String entry = URIs.decode(nextArchiveSeparator < 0 ? urlString
.substring(archiveSeparator + 2) : urlString.substring(
archiveSeparator + 2, nextArchiveSeparator));
// Wrap the input stream as a zip stream to scan it's contents for a
// match.
//
ZipInputStream zipInputStream = new ZipInputStream(inputStream);
while (zipInputStream.available() >= 0) {
ZipEntry zipEntry = zipInputStream.getNextEntry();
if (zipEntry == null) {
break;
} else if (entry.equals(zipEntry.getName())) {
inputZipEntry = zipEntry;
inputStream = zipInputStream;
// Skip to the next archive path and continue the loop.
//
archiveSeparator = nextArchiveSeparator;
nextArchiveSeparator = urlString.indexOf("!/",
archiveSeparator + 2);
continue LOOP;
}
}
zipInputStream.close();
throw new IOException("Archive entry not found " + urlString);
}
return yield(inputZipEntry, inputStream);
}
protected InputStream yield(ZipEntry zipEntry, InputStream inputStream)
throws IOException {
return inputStream;
}
/**
* Creates an input stream for the nested URL by calling
* {@link URL#openStream() opening} a stream on it.
*
* @param nestedURL
* the nested URL for which a stream is required.
* @return the open stream of the nested URL.
*/
protected InputStream createInputStream(String nestedURL)
throws IOException {
return new URL(nestedURL).openStream();
}
/**
* Creates the output stream for the URL.
*
* @return the output stream for the URL.
*/
@Override
public OutputStream getOutputStream() throws IOException {
return getOutputStream(false, -1);
}
public void delete() throws IOException {
getOutputStream(true, -1).close();
}
public void setTimeStamp(long timeStamp) throws IOException {
getOutputStream(false, timeStamp).close();
}
@SuppressWarnings("resource")
private OutputStream getOutputStream(boolean delete, long timeStamp)
throws IOException {
// Create the delegate URL
//
final String nestedURL = getNestedURL();
// Create a temporary file where the existing contents of the archive
// can be written
// before the new contents are added.
//
final File tempFile = File.createTempFile("Archive", "zip");
// Record the input and output streams for closing in case of failure so
// that handles are not left open.
//
InputStream sourceInputStream = null;
OutputStream tempOutputStream = null;
try {
// Create the output stream to the temporary file and the input
// stream for the delegate URL.
//
tempOutputStream = new FileOutputStream(tempFile);
try {
sourceInputStream = createInputStream(nestedURL);
} catch (IOException exception) {
// Continue processing if the file doesn't exist so that we try
// create a new empty one.
}
// Record them as generic streams to record state during the loop
// that emulates recursion.
//
OutputStream outputStream = tempOutputStream;
InputStream inputStream = sourceInputStream;
// The cutoff point to the next archive.
//
int archiveSeparator = urlString.indexOf(nestedURL)
+ nestedURL.length();
int nextArchiveSeparator = urlString.indexOf("!/",
archiveSeparator + 2);
// The most deeply nested output stream that will be returned
// wrapped as the result.
//
ZipOutputStream zipOutputStream;
// A buffer for transferring archive contents.
//
final byte[] bytes = new byte[4096];
// We expect there to be at least one archive path.
//
ZipEntry outputZipEntry;
boolean found = false;
for (;;) {
// The name that will be used as the archive entry.
//
String entry = URIs.decode(nextArchiveSeparator < 0 ? urlString
.substring(archiveSeparator + 2) : urlString.substring(
archiveSeparator + 2, nextArchiveSeparator));
// Wrap the current result as a zip stream, and record it for
// loop-based recursion.
//
zipOutputStream = null;
// Wrap the current input as a zip stream, and record it for
// loop-based recursion.
//
ZipInputStream zipInputStream = inputStream == null ? null
: new ZipInputStream(inputStream);
inputStream = zipInputStream;
// Loop over the entries in the zip stream.
//
while (zipInputStream != null
&& zipInputStream.available() >= 0) {
// If this entry isn't the end marker
// and isn't the matching one that we are replacing...
//
ZipEntry zipEntry = zipInputStream.getNextEntry();
if (zipEntry == null) {
break;
} else {
boolean match = entry.equals(zipEntry.getName());
if (!found) {
found = match && nextArchiveSeparator < 0;
}
if (timeStamp != -1 || !match) {
if (zipOutputStream == null) {
zipOutputStream = new ZipOutputStream(
outputStream);
outputStream = zipOutputStream;
}
// Transfer the entry and its contents.
//
if (timeStamp != -1 && match
&& nextArchiveSeparator < 0) {
zipEntry.setTime(timeStamp);
}
zipOutputStream.putNextEntry(zipEntry);
for (int size; (size = zipInputStream.read(bytes,
0, bytes.length)) > -1;) {
zipOutputStream.write(bytes, 0, size);
}
}
}
}
// Find the next archive path and continue "recursively" if
// there is one.
//
archiveSeparator = nextArchiveSeparator;
nextArchiveSeparator = urlString.indexOf("!/",
archiveSeparator + 2);
if ((delete || timeStamp != -1) && archiveSeparator < 0) {
if (!found) {
throw new IOException("Archive entry not found "
+ urlString);
}
// Create no entry since we are deleting and return
// immediately.
//
outputZipEntry = null;
break;
} else {
// Create a new or replaced entry and continue processing
// the remaining archives.
//
outputZipEntry = new ZipEntry(entry);
if (zipOutputStream == null) {
zipOutputStream = new ZipOutputStream(outputStream);
outputStream = zipOutputStream;
}
zipOutputStream.putNextEntry(outputZipEntry);
if (archiveSeparator > 0) {
continue;
} else {
break;
}
}
}
// Ensure that it won't be closed in the finally block.
//
tempOutputStream = null;
// Wrap the deepest result so that on close, the results are finally
// transferred.
//
final boolean deleteRequired = sourceInputStream != null;
FilterOutputStream result = new FilterOutputStream(
zipOutputStream == null ? outputStream : zipOutputStream) {
protected boolean isClosed;
@Override
public void close() throws IOException {
// Make sure we close only once.
//
if (!isClosed) {
isClosed = true;
// Close for real so that the temporary file is ready to
// be read.
//
super.close();
boolean useRenameTo = nestedURL.startsWith("file:");
// If the delegate URI can be handled as a file,
// we'll hope that renaming it will be really efficient.
//
if (useRenameTo) {
File targetFile = new File(URIs.decode(nestedURL
.substring(5)));
if (deleteRequired && !targetFile.delete()) {
throw new IOException("cannot delete "
+ targetFile.getPath());
} else if (!tempFile.renameTo(targetFile)) {
useRenameTo = false;
}
}
if (!useRenameTo) {
// Try to transfer it by reading the contents of the
// temporary file
// and writing them to the output stream of the
// delegate.
//
InputStream inputStream = null;
OutputStream outputStream = null;
try {
inputStream = new FileInputStream(tempFile);
outputStream = createOutputStream(nestedURL);
for (int size; (size = inputStream.read(bytes,
0, bytes.length)) > -1;) {
outputStream.write(bytes, 0, size);
}
} finally {
// Make sure they are closed no matter what bad
// thing happens.
//
if (inputStream != null) {
inputStream.close();
}
if (outputStream != null) {
outputStream.close();
}
}
}
}
}
};
return outputZipEntry == null ? result : yield(outputZipEntry,
result);
} finally {
// Close in case of failure to complete.
//
if (tempOutputStream != null) {
tempOutputStream.close();
}
// Close if we created this.
//
if (sourceInputStream != null) {
sourceInputStream.close();
}
}
}
protected OutputStream yield(ZipEntry zipEntry, OutputStream outputStream)
throws IOException {
return outputStream;
}
/**
* Creates an output stream for the nested URL by calling
* {@link URL#openConnection() opening} a stream on it.
*
* @param nestedURL
* the nested URL for which a stream is required.
* @return the open stream of the nested URL.
*/
protected OutputStream createOutputStream(String nestedURL)
throws IOException {
URL url = new URL(nestedURL);
URLConnection urlConnection = url.openConnection();
urlConnection.setDoOutput(true);
return urlConnection.getOutputStream();
}
}