package org.gjt.cuspy;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.OutputStreamWriter;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.util.ArrayList;
import java.util.Map;
import java.util.HashMap;
import java.net.URL;
import java.util.jar.Attributes;
import java.util.jar.JarEntry;
import java.util.jar.JarInputStream;
import java.util.jar.Manifest;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;
/**
* Distribute your work as a self-extracting jar file by including one file,
* JarX.class, that also safely converts text files to the receiver's encoding
* and newline conventions, and adds less than 7 kB to your jar.
*<P>
* A self-extracting file is handy if your recipient might have a
* Java runtime environment but not the jar tool.
* The text conversion offered by JarX is useful if your distribution will
* include text files, source, documentation, scripts, etc., and your recipients
* have platforms with different newline conventions.
*<H3>Text conversion background</H3>
* There are two issues in the cross-platform delivery of text files.
*<OL><LI>Different platforms indicate the end of a line differently.
* The UNIX convention uses the single character LINE FEED; the (old) Macintosh
* used only the CARRIAGE RETURN character, and DOS/Windows systems require
* every line to end with a CARRIAGE RETURN followed by a LINE FEED.
* If some conversion isn't done, a Windows file appears to have garbage
* characters at the ends of lines if moved to UNIX, or the beginnings of lines
* if moved to a Mac; UNIX and Mac files moved to Windows, or Mac files moved
* to UNIX, appear to be squished into one insanely long line.
* These effects can complicate viewing and editing the files, and interfere
* with automated processes like diff or version control.
*<LI>Different platforms may use different default character encodings.
* Ideally, text files within a jar should be extracted into the local encoding.
*</OL><P>
* It's important to apply such transformations <EM>only</EM> to the files
* within the archive that are actually <EM>known</EM> to contain text.
* Passing binary data or class files through character and newline
* transformations will corrupt them.
*<H4>The ZIP approach and why it loses</H4>
* The popular zip format on which jar is based already has a provision for
* newline (but not character set) conversion. Each entry includes a text/binary
* bit, and the unzip program applies newline conversion while extracting, but
* only to the files flagged as text.
*<P>
* One problem, though not the fatal one, with this scheme is that there is no
* single convention for newlines inside the zip file. Instead, files are
* stored just as they are found on the source system, and a code indicating the
* source operating system is stored in the archive. The receiving unzip
* program must interpret the code and know what newline convention that
* operating system used, in order to convert newlines appropriately.
*<P>
* The fatal flaw, however, has to do with the way the text/binary
* bit gets set in the first place. While building the archive, the common zip
* programs look at statistical properties of byte frequencies in the input,
* and set the text bit for any entry that looks like it might be text! If a
* binary file happens to contain an unlucky sequence of bytes, it will be
* flagged as text and then silently corrupted by any unzip program that honors
* the text bit. That can happen, and has happened, to class files in zip
* archives if the recipient uses unzip -a, and causes significant misery if
* the package is widely distributed.
*<H4>A better way</H4>
* Even though the jar format is based on zip, it would be a mistake to make jar
* tools that rely on the zip text/binary bit, because common
* practice has made that bit unreliable. What's needed is a standard way for
* the developer to explicitly indicate the processing needed for each entry
* in the jar. Also, a single representation should be adopted for newlines
* in text files inside a jar, so an extracting program only needs to convert
* from that representation to the local one, and does not need to concern
* itself with details of the system where the jar was created.
*<P>
* As of JDK 1.3, Sun has extended the
*<A
HREF="http://java.sun.com/products/jdk/1.3/docs/guide/jar/jar.html#Per-Entry%20Attributes">
*Jar File Specification</A> to allow a <CODE>Content-Type</CODE> in the
* Manifest for each jar entry. The value of <CODE>Content-Type</CODE> is a
*<A HREF="http://www.isi.edu/in-notes/iana/assignments/media-types/media-types">MIME
* type</A>, and with this a developer can specify exactly which entries in a
* jar should be treated as text. The question of a standard representation
* for newlines inside the jar is settled, because
* <A HREF="ftp://ftp.isi.edu/in-notes/rfc2046.txt">[RFC2046 section 4.1.1]</A>
* establishes a canonical line break representation for all subtypes of the
* <CODE>text</CODE> MIME type. Therefore, correct translation of line breaks from any
* platform to any platform can be achieved if a jar-building program just
* converts from its local convention to the canonical CRLF form, and a jar
* extraction program just converts the canonical to its own local form. Neither
* program needs to know anything about the other environment.
* Finally, the <CODE>charset</CODE> parameter of the <CODE>text</CODE> type
* allows explicit specification of the character encoding used in a jar entry,
* and the extracting program can automatically convert into the encoding used
* on the local system. (But see <STRONG>Call to action</STRONG> below.)
*<H3>What JarX Does</H3>
* <CODE>Content-Type</CODE> entries in a Manifest were introduced in Java 1.3
* but are compatible with earlier jar specifications; a jar file containing
* such entries can be processed without any trouble by any jar tool compliant
* with the old or new standard. However, there is not yet a full jar tool
* available that will honor the content types and do automatic transformation
* of text entries. To fill the need until that functionality is added to the
* widely-available jar tools, JarX is available now.
*<P>
* JarX.Build produces a jar, working from a manifest file prepared by the
* developer. Entries with any <CODE>text</CODE> type will be translated from
* the local encoding into the specified <CODE>charset</CODE> if given, and
* entries with the specific type <CODE>text/plain</CODE> will have their line
* endings converted to the CRLF canonical form. Line endings are left alone
* for all other subtypes of <CODE>text</CODE>, but this decision is open to
* comment.
*<P>
* The file produced by JarX.Build is a fully compliant jar and can be unpacked
* by any jar or unzip tool, but current tools will not automatically convert
* the text files to the local conventions. By including the single class file
* <CODE>JarX.class</CODE> in the jar, a developer produces a self-extracting
* archive that can be executed to unpack itself on any Java 1.6 or later
* virtual machine, performing all automatic conversions and requiring no jar
* tool at all.
*<H3>Building a Jar</H3>
* To build a jar file, first prepare the manifest, using any text editor or,
* more likely, a script. Include a <CODE>Name:</CODE> entry for every file
* to be included in the jar. JarX.Build archives only the files named in
* the manifest. Be sure to include <CODE>Manifest-Version: 1.0</CODE> as
* the first line of the manifest; JarX.Build does not do it for you. To make
* the jar self-extracting, make the next line<BR>
* <CODE>Main-Class: org.gjt.cuspy.JarX</CODE><BR> and be sure to include a
* <CODE>Name:</CODE> entry for <CODE>org/gjt/cuspy/JarX.class</CODE>.
*<P>
* Add an appropriate <CODE>Content-Type:</CODE> line after the
* <CODE>Name:</CODE> line for every entry that needs one. JarX itself only
* distinguishes the <CODE>text</CODE> types from nontext (everything else),
* and treats a missing <CODE>Content-Type:</CODE> as nontext, so for purposes
* of JarX you only need to add content types for text files. For other
* purposes you may wish to include the types of other entries as well.
* In the simplest case, just omit content types for your non-text files,
* and add <CODE>Content-Type: text/plain; charset=UTF-8</CODE> for files that
* you want auto-converted. Then give the command<BR>
* <CODE>java org.gjt.cuspy.JarX$Build foo.jar manifest</CODE><BR> if
* <CODE>manifest</CODE> is the name of your prepared manifest file and
* <CODE>foo.jar</CODE> names the jar you want to create.
* The order of files in the jar will be the order of their names in the
* manifest.
*<H4>Special manifest attributes</H4>
* For 2016, JarX now recognizes some special manifest attributes:
* <DL>
* <DT>_JarX_CharsetInArchive</DT>
* <DD>As a per-entry attribute, identifies the character set of the associated
* text member as stored in the archive. This is entirely equivalent to the
* earlier method using {@code ;charset=} on the Content-Type attribute,
* which JarX still supports, but has not been widely adopted. As a main
* attribute, sets a default for any text members without a per-entry value.
* </DD>
* <DT>_JarX_CharsetWhenUnpacked</DT>
* <DD>As a per-entry attribute, identifies the character set of the associated
* text member when not in the archive. At Build time, the member will be
* transcoded from this charset (instead of the platform's default) to the
* specified InArchive charset, and, on extraction, will be transcoded back
* to this charset regardless of the platform's default encoding. This
* attribute can be used for files conforming to specifications that define
* a fixed encoding. In other cases, omitting this attribute allows the
* member to be extracted into the receiving platform's default charset.
* As a main attribute, sets a default for text members without a per-entry
* value.</DD>
* <DT>_JarX_Permissions</DT>
* <DD>As a per-entry attribute, declares permissions to apply to the
* extracted file. (At present, not applied to directories.) Only the
* Java SE 6 {@link java.io.File} permissions are supported, a small subset
* of what most platforms support. A comma-separated list of
* <em>usage</em>{@code =}<em>bywhom</em>, where <em>usage</em> can be
* {@code read}, {@code write}, or {@code execute} and <em>bywhom</em> can be
* {@code none}, {@code owner}, or {@code all}. As a main attribute, sets a
* default for members without a per-entry attribute. For any <em>usage</em>
* that is left unspecified, no {@link java.io.File File} method will be
* called to change that permission, so the system's defaults will apply.
* </DD>
* <DT>_JarX_PathResolver</DT>
* <DD>Only recognized as a main attribute, this specifies a script that JarX
* will invoke for every archive member, with the following bindings in scope:
* <DL>
* <DT>properties</DT><DD>The Java system Properties object.</DD>
* <DT>storedPath</DT><DD>The full pathname of the member, exactly as
* stored in the archive.</DD>
* <DT>platformPath</DT><DD>The full pathname after only replacing the
* {@code /} separator character with the platform's {@code file.separator}
* if different.
* </DD>
* <DT>computedPath</DT><DD>Initially the same as {@code platformPath}.
* If the script stores a new value in {@code computedPath}, the member
* will be extracted to that full path.</DD>
* </DL>
* The script is given as the value of this attribute, using the same
* RFC822-ish lexical conventions the jar spec says it was "inspired by".
* The value must begin with a MIME type (two atoms separated by a slash,
* as in {@code application/javascript}, followed by at least one
* {@code QUOTEDSTRING}. RFC822 uses the double-quote for this purpose, and
* backslash to escape it when needed, which also means you must double any
* backslash intended for the script. Additional {@code QUOTEDSTRING}s simply
* append to the script. The RFC822 line-continuation rule can be exploited
* by supplying the script as multiple quoted strings, one per line, each
* indented by a space. The strings are appended with nothing in between
* (so, the continuation newlines do not become newlines in the script), but
* a {@code /} can appear between any two quoted strings to insert an
* explicit newline in the script. In addition to whatever comment syntax is
* allowed in the scripting language, RFC822 comments (marked by parentheses,
* and nestable) are allowed outside of the quoted strings.
* </DD>
* </DL>
*<H3>Extracting a jar</H3>
* The command <CODE>java -jar foo.jar</CODE> is all it takes
* to extract a jar. The <CODE>Main-Class</CODE> entry in the manifest
* identifies the entry point of JarX so it does not need to be specified.
*<P>
* JarX
*<H3>Call to action</H3>
* At the moment, Sun's Jar File Specification contains a mistake in the
* description of a content type that could lead to implementations
* that reject valid content types. Squash this bug before it bites:
* log on to the
*<A HREF="http://developer.java.sun.com/developer/">Java Developer
* Connection</A> (it's free) and cast one, two, or all three of your Bug Votes
* for
*<A HREF="http://developer.java.sun.com/developer/bugParade/bugs/4310708.html">
*Bug #4310708</A>.
*<H3>Miscellany</H3>
* This class is a little sloppy and relatively slow, especially the Build side
* when converting plain text files. The idea for JarX is a natural outgrowth
* of the Java 1.3 manifest standard and I have suggested that the functionality
* of JarX be added into the widely available jar tools. If Sun takes the
* suggestion then the functionality of JarX will soon be provided by nice
* fast optimized tools and it won't be necessary to spend a lot of time
* polishing JarX.
*<P>
* Error handling is roughly nonexistent. JarX is careful to avoid silent
* corruption of data, even verifying that all character encoding calls are
* successful, but makes no attempt to be graceful about errors or surprises.
* If something doesn't work the likely result is a one line message and abrupt
* exit, or an uncaught exception and stack trace.
*<P>
* The coding style is a little contrived just to arrange it so JarX.class is
* the only file needed in the jar to make it self-extracting. In particular
* the JarX class is also written to serve as the class of tokens returned by
* the structured-field-body lexer, to avoid introducing a second class. Weird,
* perhaps, but harmless weird.
*@author <A HREF="mailto:chap@gjt.org">Chapman Flack</A>
*@version $Id$
*/
public class JarX {
/**How to treat the entry being processed: bytes, characters, lines.
* Used only in the JarX instance created by main(). Set by classify().
* Only the exact String instances BYTES, CHARACTERS, LINES are to be used.
*/
protected String treatment;
protected static final String BYTES = "bytes";
protected static final String CHARACTERS = "characters";
protected static final String LINES = "lines";
/**Charset (in archive) of the entry being processed.
* Used only in the JarX instance created by main(). Set by classify().
*/
protected Charset archiveCharset;
/**Charset when unpacked of the entry being processed.
* Used only in the JarX instance created by main(). Set by classify().
*/
protected Charset unpackedCharset;
/**Read permission to be set on the file.
* Only the final Strings NONE, OWNER, or ALL are to be used, or null, in
* which case no explicit setting is made and the OS defaults apply.
*/
protected String readPermission;
/**Write permission to be set on the file.
* Only the final Strings NONE, OWNER, or ALL are to be used, or null, in
* which case no explicit setting is made and the OS defaults apply.
*/
protected String writePermission;
/**Execute permission to be set on the file.
* Only the final Strings NONE, OWNER, or ALL are to be used, or null, in
* which case no explicit setting is made and the OS defaults apply.
*/
protected String executePermission;
protected static final String NONE = "none";
protected static final String OWNER = "owner";
protected static final String ALL = "all";
/**As for treatment, but set from main attributes (or BYTES if not present).*/
protected String defaultTreatment = BYTES;
/**As for archiveCharset, but set from main attributes (default UTF-8).*/
protected Charset defaultArchiveCharset = Charset.forName( "UTF-8");
/**As for unpackedCharset, but set from main attributes or platform default.*/
protected Charset defaultUnpackedCharset = Charset.defaultCharset();
/**As for readPermission but set from main attributes, null if not present.*/
protected String defaultReadPermission;
/**As for writePermission but set from main attributes, null if not present.*/
protected String defaultWritePermission;
/**As for executePermission but set from main attributes, null if not present.
*/
protected String defaultExecutePermission;
/**Script engine to run the name resolver script, if any.*/
protected ScriptEngine resolverEngine;
/**The name resolver script, if any.*/
protected String resolverScript;
/**Attribute name for specifying the in-archive charset.
* The Java powers that be didn't go for
*<A HREF="http://developer.java.sun.com/developer/bugParade/bugs/4310708.html">
*Bug #4310708</A> so there needs to be a dedicated manifest key for this
* (though JarX will still honor ;charset= on the Content-Type too).
*/
public final Attributes.Name ARCHIVE_CHARSET =
new Attributes.Name( "_JarX_CharsetInArchive");
/**Attribute name for specifying the when-unpacked charset.
* This was not in the original JarX; the platform default was always used,
* and still is if this attribute is not present.
*/
public final Attributes.Name UNPACKED_CHARSET =
new Attributes.Name( "_JarX_CharsetWhenUnpacked");
/**Permissions (only as supported in java.io.File for SE 6)
* spec *(, spec) where spec is action=whom, action is read, write, or
* execute, and whom is none, owner, or all.
*/
public final Attributes.Name PERMISSIONS =
new Attributes.Name( "_JarX_Permissions");
/** Main attribute to specify a JSR223 script to control extracted names. */
public final Attributes.Name PATHRESOLVER =
new Attributes.Name( "_JarX_PathResolver");
/**Main attributes saved from the manifest (which must be seen early).*/
protected Attributes mainAttributes;
/**Token type, when JarX objects are used to return content type tokens*/
public short type;
/**Token text when JarX objects are used to return content type tokens*/
public String value;
/**Token types from the structured field body lexer defined in
*<A HREF="ftp://ftp.isi.edu/in-notes/rfc822.txt">RFC822</A>
* as modified in
*<A HREF="ftp://ftp.isi.edu/in-notes/rfc2045.txt">RFC2045</A>.
* Also state numbers for the automaton in
* {@link #structuredFieldBody(String,int) structuredFieldBody}.
*/
public static final short ATOM = 5;
public static final short COMMENT = 4;
public static final short DOMAINLITERAL = 3;
public static final short QUOTEDSTRING = 2;
public static final short TSPECIAL = 1;
static final short START = 0;
/**True if this JarX object represents a token of one of the given types.
* @param type allowable types
* @return as titled
*/
public boolean is( short... type) {
for ( short t : type )
if ( t == this.type )
return true;
return false;
}
/**True if this JarX object represents a token of one of the given types
* and its value equals the given string.
* @param value string value for comparison
* @param type allowable types
* @return as titled
*/
public boolean holds( String value, short... type) {
return is( type) && value.equals( this.value);
}
/**True if this JarX object represents a token of one of the given types
* and its value equals the given string, case-insensitively.
* @param value string value for comparison
* @param type allowable types
* @return as titled
*/
public boolean holdsIgnoreCase( String value, short... type) {
return is( type) && value.equalsIgnoreCase( this.value);
}
/**Name of the JarX class file as stored in the jar*/
public static final String me
= JarX.class.getName().replace('.', '/') + ".class";
/**Name of the manifest file as stored in the jar*/
public static final String manifestName = "META-INF/MANIFEST.MF";
/**The (fixed) encoding used for manifest content*/
public static final String manifestCode = "UTF-8";
/**The entry point for extracting.
*@param args argument list
*@throws Exception if anything doesn't work, punt
*/
public static void main( String[] args) throws Exception {
JarX e = new JarX();
if ( args.length > 0 ) {
System.err.println( "usage: java -jar filename.jar");
System.exit( 1);
}
e.extract();
}
/**Find the jar I was loaded from and extract all entries except my own
* class file.
*@throws Exception if anything doesn't work, punt
*/
public void extract() throws Exception {
URL jarURL =
this.getClass().getProtectionDomain().getCodeSource().getLocation();
InputStream is = jarURL.openStream();
JarInputStream jis = new JarInputStream( is);
Manifest mf = null;
for ( JarEntry je;; ) {
je = jis.getNextJarEntry();
if ( je == null )
break;
if ( null == mf ) {
mf = jis.getManifest();
if ( null != mf )
setDefaults( mf.getMainAttributes());
}
if ( ! je.getName().equals( me) )
extract( je, jis);
jis.closeEntry();
}
jis.close();
}
/**Examine the main attributes to set any defaults.
* Includes loading the required script engine if a name resolver script
* is given.
* @param mainAttributes as obtained from the manifest
*/
public void setDefaults( Attributes mainAttributes) {
this.mainAttributes = mainAttributes;
classify( mainAttributes, false);
defaultTreatment = treatment;
defaultArchiveCharset = archiveCharset;
defaultUnpackedCharset = unpackedCharset;
defaultReadPermission = readPermission;
defaultWritePermission = writePermission;
defaultExecutePermission = executePermission;
if ( null == mainAttributes )
return;
String v = mainAttributes.getValue( PATHRESOLVER);
if ( null == v )
return;
JarX[] toks = structuredFieldBody( v, 0);
if ( toks.length < 4
|| ! toks[0].is( ATOM)
|| ! toks[1].holds("/", TSPECIAL)
|| ! toks[2].is( ATOM)
|| ! toks[3].is( QUOTEDSTRING) ) {
System.err.printf( "Malformed name resolver attribute: %s\n", v);
System.exit( 1);
}
String mimetype = toks[0].value + "/" + toks[2].value;
StringBuilder script = new StringBuilder( toks[3].value);
int i = 4;
while ( i < toks.length ) {
if ( toks[i].holds( "/", TSPECIAL) )
script.append( '\n');
else if ( toks[i].is( QUOTEDSTRING) )
script.append( toks[i].value);
else
break;
++i;
}
if ( i < toks.length ) {
System.err.printf( "Malformed name resolver attribute: %s\n", v);
System.exit( 1);
}
ScriptEngineManager mgr = new ScriptEngineManager();
resolverEngine = mgr.getEngineByMimeType( mimetype);
if ( null == resolverEngine ) {
System.err.printf( "No script engine found for %s\n", mimetype);
System.exit( 1);
}
resolverEngine.put( "properties", System.getProperties());
resolverScript = script.toString();
}
/**Set instance variables for text/binary and permissions treatment
* according to the passed Attributes.
* @param atts Usually a per-entry attribute set, but {@code classify} is
* also called by {@code setDefaults} to parse the main attributes.
* @param lazy In the usual case, as soon as an entry is classified as
* non-text, {@code classify} can return without looking for charset
* information. When called by {@code setDefaults}, however, laziness is not
* appropriate.
*/
public void classify( Attributes atts, boolean lazy) {
treatment = defaultTreatment;
archiveCharset = defaultArchiveCharset;
unpackedCharset = defaultUnpackedCharset;
readPermission = defaultReadPermission;
writePermission = defaultWritePermission;
executePermission = defaultExecutePermission;
if ( null == atts )
return;
String v = atts.getValue( PERMISSIONS);
if ( null != v ) {
String r = null;
String w = null;
String x = null;
JarX[] toks = structuredFieldBody( v, 0);
int i = 0;
while ( i + 2 < toks.length ) {
if ( ! toks[i].is( ATOM) || ! toks[i+1].holds( "=", TSPECIAL) )
break;
if ( ! toks[i+2].is( ATOM) )
break;
String p = toks[i].value;
String noa = toks[i+2].value;
if ( NONE.equalsIgnoreCase( noa) )
noa = NONE;
else if ( OWNER.equalsIgnoreCase( noa) )
noa = OWNER;
else if ( ALL.equalsIgnoreCase( noa) )
noa = ALL;
else
break;
if ( "read".equalsIgnoreCase( p) && null == r )
r = noa;
else if ( "write".equalsIgnoreCase( p) && null == w )
w = noa;
else if ( "execute".equalsIgnoreCase( p) && null == x )
x = noa;
else
break;
i += 3;
if ( i+3 < toks.length && toks[i].holds( ",", TSPECIAL) )
++i;
}
if ( i < toks.length ) {
System.err.printf( "Malformed permissions attribute: %s\n", v);
System.exit( 1);
}
if ( null != r )
readPermission = r;
if ( null != w )
writePermission = w;
if ( null != x )
executePermission = x;
}
boolean archiveCharsetFound = false;
v = atts.getValue( Attributes.Name.CONTENT_TYPE);
if ( null != v ) {
JarX[] type = structuredFieldBody( v, 0);
if ( type[0].holdsIgnoreCase( "text", ATOM)
&& type[1].holds( "/", TSPECIAL) ) {
treatment = type[2].holdsIgnoreCase( "plain", ATOM)? LINES : CHARACTERS;
archiveCharsetFound = archiveCharsetFromType( type);
}
}
if ( BYTES == treatment && lazy )
return;
if ( ! archiveCharsetFound ) {
v = atts.getValue( ARCHIVE_CHARSET);
if ( null != v )
archiveCharset = Charset.forName( v);
}
v = atts.getValue( UNPACKED_CHARSET);
if ( null != v )
unpackedCharset = Charset.forName( v);
}
/**Parse a Content-Type for any {@code charset} parameter.
* @param type tokenized Content-Type value
* @return true if the Content-Type specified a charset
*/
protected boolean archiveCharsetFromType( JarX[] type) {
String charset = null;
int i = 3;
while ( i < type.length ) {
if ( ! type[i].holds( ";", TSPECIAL) )
break;
if ( type[++i].holdsIgnoreCase( "charset", ATOM) ) {
if ( ! type[++i].holds( "=", TSPECIAL) )
break;
if ( ! type[++i].is( ATOM, QUOTEDSTRING) )
break;
charset = type[i].value;
break;
}
if ( ! type[++i].holds( "=", TSPECIAL) )
break;
if ( ! type[++i].is( ATOM, QUOTEDSTRING) )
break;
++i;
}
if ( null != charset ) {
archiveCharset = Charset.forName( charset);
return true;
}
if ( i < type.length ) {
System.err.println( "Malformed Content-Type specification!");
System.exit( 1);
}
return false;
}
/**Extract a single entry, performing any appropriate conversion
*@param je JarEntry for the current entry
*@param is InputStream with the current entry content
*@throws IOException for any problem involving I/O
*@throws ScriptException for any problem involving the script engine
*/
public void extract( JarEntry je, InputStream is)
throws IOException, ScriptException {
classify( je.getAttributes(), true);
String orig = je.getName();
String s = orig;
if ( File.separatorChar != '/' )
s = s.replace( '/', File.separatorChar);
if ( null != resolverScript ) {
resolverEngine.put( "storedPath", orig);
resolverEngine.put( "platformPath", s);
resolverEngine.put( "computedPath", s);
resolverEngine.eval( resolverScript);
s = (String)resolverEngine.get( "computedPath");
}
System.err.print( s + " ");
File f = new File( s);
if ( je.isDirectory() ) {
if ( f.isDirectory() || f.mkdirs() )
System.err.println();
else
System.err.println( "FAILED!");
return;
}
OutputStream os;
File tmpf;
File d = f.getParentFile();
if ( null == d )
d = new File( System.getProperty( "user.dir"));
try {
tmpf = File.createTempFile( f.getName(), ".tmp", d);
}
catch ( IOException e ) {
if ( ! d.mkdirs() )
throw e;
tmpf = File.createTempFile( f.getName(), ".tmp", d);
}
os = new FileOutputStream( tmpf);
if ( null != readPermission ) {
if ( ALL == readPermission )
tmpf.setReadable( true, false);
else {
tmpf.setReadable( false, false);
if ( OWNER == readPermission )
tmpf.setReadable( true, true);
}
}
if ( null != writePermission ) {
if ( ALL == writePermission )
tmpf.setWritable( true, false);
else {
tmpf.setWritable( false, false);
tmpf.setWritable( true, true); /* will when done writing */
}
}
shovel( is, os);
os.close();
if ( NONE == writePermission )
tmpf.setWritable( false, false);
if ( null != executePermission ) {
if ( ALL == executePermission )
tmpf.setExecutable( true, false);
else {
tmpf.setExecutable( false, false);
if ( OWNER == executePermission )
tmpf.setExecutable( true, true);
}
}
tmpf.renameTo( f);
}
/**Copy content from an input to an output stream until end.
* Whether the content is shoveled as bytes, characters, or lines will be
* determined by instance variables that have been set by calling
* {@link #classify(Attributes,boolean) classify} before calling this method.
*@param is source of input
*@param os destination for output
*@throws IOException for any problem involving I/O
*/
public void shovel( InputStream is, OutputStream os) throws IOException {
if ( BYTES == treatment )
shovelBytes( is, os);
else
shovelText( is, os);
}
/**Copy <EM>bytes</EM> from an input to an output stream until end.
* No character encoding or newline conversion applies.
*@param is source of input
*@param os destination for output
*@throws IOException for any problem involving I/O
*/
public static void shovelBytes( InputStream is, OutputStream os)
throws IOException {
byte[] buf = new byte [ 1024 ];
int got;
for ( ;; ) {
got = is.read( buf, 0, buf.length);
if ( got == -1 )
break;
os.write( buf, 0, got);
}
System.err.println( "as bytes");
}
/**Copy <EM>text</EM> from an input to an output stream until end.
* Determines the encoding transformation to use (based on the
* <CODE>charset</CODE> content-type parameter) and whether to copy as
* lines (with newline conversion) or unmolested characters.
* <CODE>text/plain</CODE> is copied as lines, all other text subtypes
* as characters.
*@param is source of input
*@param os destination of output
*@throws IOException for any problem involving I/O
*/
public void
shovelText( InputStream is, OutputStream os)
throws IOException {
if ( LINES == treatment )
shovelLines( is, os);
else
shovelChars( is, os);
}
/**Copy <EM>lines</EM> of text from an input from an output stream, applying
* the specified character encoding and translating newlines.
* This method handles the extracting case, where the named encoding is
* associated with the input stream (jar) and the platform default encoding
* with the output (local file), and the local line.separator is used to
* separate lines on the output.
* Overridden in
* {@link JarX.Build#shovelLines(InputStream,OutputStream) build} to do
* the reverse when building a jar.
* To avoid silent corruption of data, this method verifies that all
* characters from the jar are successfully converted to the local platform's
* encoding.
*@param is the source of input
*@param os destination for output
*@throws IOException for any problem involving I/O
*/
public void
shovelLines( InputStream is, OutputStream os)
throws IOException {
InputStreamReader isr =
new InputStreamReader( is, archiveCharset.newDecoder());
BufferedReader br = new BufferedReader( isr);
OutputStreamWriter osw =
new OutputStreamWriter( os, unpackedCharset.newEncoder());
BufferedWriter bw = new BufferedWriter( osw);
String s;
for ( ;; ) {
s = br.readLine();
if ( s == null )
break;
bw.write( s);
bw.newLine();
}
bw.flush();
osw.flush();
System.err.printf( "as lines (%s)\n", describeTranscoding(isr, osw));
}
/**Copy <EM>characters</EM> of text from an input from an output stream,
* applying the specified character encoding but not translating newlines.
* This method handles the extracting case, where the named encoding is
* associated with the input stream (jar) and the platform default encoding
* with the output (local file).
* Overridden in
* {@link Build#shovelChars(InputStream,OutputStream) build} to do
* the reverse when building a jar.
* To avoid silent corruption of data, this method verifies that all
* characters from the jar are successfully converted to the local platform's
* encoding.
*@param is the source of input
*@param os destination for output
*@throws IOException for any problem involving I/O
*/
public void
shovelChars( InputStream is, OutputStream os)
throws IOException {
InputStreamReader isr =
new InputStreamReader( is, archiveCharset.newDecoder());
OutputStreamWriter osw =
new OutputStreamWriter( os, unpackedCharset.newEncoder());
char[] c = new char [ 1024 ];
int got;
for ( ;; ) {
got = isr.read( c, 0, c.length);
if ( got == -1 )
break;
osw.write( c, 0, got);
}
osw.flush();
System.err.printf( "as characters (%s)\n", describeTranscoding(isr, osw));
}
public String describeTranscoding(
InputStreamReader isr, OutputStreamWriter osw) {
String ie = isr.getEncoding();
String oe = osw.getEncoding();
if ( ie.equals( oe) )
return ie;
return ie + " -> " + oe;
}
/**Public constructor for an application using JarX to unpack jars.*/
public JarX() { }
/**Constructor for JarX objects used as tokens returned by the lexer.
*@param t the type of this token
*@param v the corresponding text (with delimiters removed and backslashes
* resolved for quoted strings, domain text, and comments)
*/
protected JarX( short t, String v) { type = t; value = v; }
/**Lexical analyzer for structured field bodies as described in
*<A HREF="ftp://ftp.isi.edu/in-notes/rfc822.txt">RFC822</A>
* and modified in
*<A HREF="ftp://ftp.isi.edu/in-notes/rfc2045.txt">RFC2045</A>.
* Comments are processed and stored in tokens that are, at the last
* minute, excluded from the returned token list; only two lines would need
* to be changed to use this lexer in an application that wanted comments
* returned.
*@param field a header field
*@param off offset to the start of the structured field body
* (skip the field name and colon)
*@return An array of {@link #JarX(short,String) tokens} with any
* COMMENT tokens (for JarX purposes) excluded
*/
public static JarX[] structuredFieldBody( String field, int off) {
char[] buf = new char [ field.length() - off ];
field.getChars( off, off + buf.length, buf, 0);
int beg = 0, end = -1, la;
int commentDepth = 0;
short state = START;
short lastState = state;
boolean bashed = false;
ArrayList<JarX> v = new ArrayList<JarX>();
dfa: for ( la = 0; la < buf.length; ) {
if ( end >= beg ) {
if ( lastState != COMMENT )
v.add(new JarX( lastState, new String( buf, beg, end-beg)));
end = -1;
}
lastState = state;
switch ( state ) {
case START:
switch ( buf[la] ) {
case '"': beg = ++la; state = QUOTEDSTRING; continue dfa;
case '[': beg = ++la; state = DOMAINLITERAL; continue dfa;
case '(': beg = ++la; state = COMMENT; continue dfa;
case '/': case '?': case '=': case ')': case '<': case '>':
case '@': case ',': case ';': case ':': case '\\': case ']':
state = TSPECIAL; continue dfa;
case ' ': case '\u0009': ++la; continue dfa;
default: beg = la++; state = ATOM; continue dfa;
}
case TSPECIAL:
beg = la;
end = ++la;
state = START;
continue dfa;
case QUOTEDSTRING:
for ( end = beg; la < buf.length; ++la ) {
if ( bashed )
bashed = false;
else if ( buf [ la ] == '\\' ) {
bashed = true;
continue;
}
else if ( buf [ la ] == '"' ) {
++la;
state = START;
continue dfa;
}
buf [ end++ ] = buf [ la ];
}
break dfa;
case DOMAINLITERAL:
for ( end = beg; la < buf.length; ++la ) {
if ( bashed )
bashed = false;
else if ( buf [ la ] == '\\' ) {
bashed = true;
continue;
}
else if ( buf [ la ] == ']' ) {
++la;
state = START;
continue dfa;
}
buf [ end++ ] = buf [ la ];
}
break dfa;
case COMMENT:
++commentDepth;
for ( end = beg; la < buf.length; ++la ) {
if ( bashed )
bashed = false;
else if ( buf [ la ] == '\\' ) {
bashed = true;
continue;
}
else if ( buf [ la ] == ')' && 0 == --commentDepth ) {
++la;
state = START;
continue dfa;
}
else if ( buf [ la ] == '(' )
++commentDepth;
buf [ end++ ] = buf [ la ];
}
break dfa;
case ATOM:
for ( end = la; la < buf.length; ++la ) {
if ( buf [ la ] <= ' ' ) {
state = START;
continue dfa;
}
switch ( buf [ la ] ) {
case '/': case '?': case '=':
case '(': case ')': case '<': case '>': case '@':
case ',': case ';': case '\\': case '"':
case '[': case ']': state = START; continue dfa;
default: ++end;
}
}
state = START;
break dfa;
}
}
if ( state != START )
System.err.println( "Warning: incomplete qstring, dtext, or comment");
if ( end >= beg )
if ( lastState != COMMENT )
v.add(new JarX( lastState, new String( buf, beg, end-beg)));
return v.toArray( new JarX [ v.size() ]);
}
/**Subclass of JarX containing the code needed to build jars. This class
* is not needed for extracting and this class
* file does not need to be included in a self-extracting jar.
*/
public static class Build extends JarX {
/**Entry point for building a jar.
* Names of all files to be put in the jar (except the manifest itself)
* are taken from the manifest.
*@param args two command line arguments: 1) the name of the jar file
* to create; 2) the name of the manifest file.
*@throws Exception if anything goes wrong, punt
*/
public static void main( String[] args) throws Exception {
if ( args.length != 2 ) {
System.err.println( "usage: JarX.Build jarfile manifest");
System.exit( 1);
}
new Build().build( args[0], args[1]);
}
/**Names of files to include, in order of appearance in the manifest*/
ArrayList<String> names = new ArrayList<String>();
/**Attribute sections of those files, null if not specified*/
ArrayList<Attributes> sections = new ArrayList<Attributes>();
/**Method to be used by an application using this class to build a jar.
*@param jarFile name of jar file to be created
*@param manif name of an existing manifest file containing the names
* of files to include in the jar. File names in the manifest obey zip
* conventions with the forward slash / as the path operator, which may
* differ from the local platform convention.
*@throws Exception if anything doesn't work, punt
*/
public void build( String jarFile, String manif) throws Exception {
FileOutputStream fos = new FileOutputStream( jarFile);
ZipOutputStream zos = new ZipOutputStream( fos);
FileInputStream is = new FileInputStream( manif);
ZipEntry ze;
File f;
this.manifest( is);
is.close();
System.err.print( manifestName + " ");
is = new FileInputStream( manif);
ze = new ZipEntry( manifestName);
zos.putNextEntry( ze);
classify( null, true);
archiveCharset = Charset.forName( manifestCode);
this.shovelLines( is, zos);
is.close();
zos.closeEntry();
String[] n = new String [ names.size() ];
Attributes[] t = new Attributes[ sections.size() ];
names.toArray( n);
sections.toArray( t);
for ( int i = 0; i < n.length; ++i ) {
if ( n[i].equals( manifestName) )
continue;
System.err.print( n[i] + " ");
ze = new ZipEntry( n[i]);
f = new File( File.separatorChar == '/' ? n[i] :
n[i].replace( '/', File.separatorChar));
ze.setTime( f.lastModified());
zos.putNextEntry( ze);
if ( ze.isDirectory() ) {
System.err.println();
}
else if ( f.isDirectory() ) {
System.err.println( "DIRECTORY! add / in manifest.");
System.exit( 1);
}
else {
is = new FileInputStream( f);
classify( t[i], true);
if ( BYTES == treatment )
this.shovelBytes( is, zos);
else
this.shovelText( is, zos);
is.close();
}
zos.closeEntry();
}
zos.close();
}
/**Overridden to
* save name-to-type mappings in Lists instead of the Map, to
* preserve the order of names in the manifest.
*/
void store( String name, Attributes atts) {
names.add( name);
sections.add( atts);
}
/**Overridden to apply the archive encoding to the output stream (jar
* entry), the unpacked encoding to the input stream (local file), and use
* the RFC2046-required CRLF line separator on the output.
*@param is source of input (local file)
*@param os destination of output (jar entry)
*/
public void
shovelLines( InputStream is, OutputStream os)
throws IOException {
InputStreamReader isr =
new InputStreamReader( is, unpackedCharset.newDecoder());
BufferedReader br = new BufferedReader( isr);
OutputStreamWriter osw =
new OutputStreamWriter( os, archiveCharset.newEncoder());
BufferedWriter bw = new BufferedWriter( osw);
String crlf = "\r\n";
String s;
for ( ;; ) {
s = br.readLine();
if ( s == null )
break;
bw.write( s);
bw.write( crlf);
}
bw.flush();
osw.flush();
System.err.printf( "as lines (%s)\n", describeTranscoding(isr, osw));
}
/**Overridden to apply the archive encoding to the output stream (jar entry)
* and the unpacked encoding to the input stream (local file).
*@param is source of input (local file)
*@param os destination of output (jar entry)
*/
public void
shovelChars( InputStream is, OutputStream os)
throws IOException {
InputStreamReader isr =
new InputStreamReader( is, unpackedCharset.newDecoder());
OutputStreamWriter osw =
new OutputStreamWriter( os, archiveCharset.newEncoder());
char[] c = new char [ 1024 ];
int got;
for ( ;; ) {
got = isr.read( c, 0, c.length);
if ( got == -1 )
break;
osw.write( c, 0, got);
}
osw.flush();
System.err.printf( "as characters (%s)\n", describeTranscoding(isr, osw));
}
/**Read the manifest and build lists of file names and Attributes objects.
* This was originally here because JarX wanted to support Java 1.1, which
* lacked java.util.jar. The reason it is still here (in Build only) is that
* the java.util.jar.Manifest implementation doesn't preserve the order of
* manifest sections, while it is nice to build the jar in the specified
* order.
*@param is an input stream already open on the manifest
*@throws IOException if unable to read the manifest
*/
public void manifest( InputStream is)
throws IOException {
InputStreamReader isr;
Charset enc = Charset.forName(manifestCode);
isr = new InputStreamReader( is, enc.newDecoder());
BufferedReader br = new BufferedReader( isr);
while ( section( br) ); /* */
}
/**Process one manifest section, adding a dictionary entry if the section
* contains both a <CODE>Name:</CODE> and a <CODE>Content-Type</CODE>
* attribute.
*@param r BufferedReader already open on the manifest
*@return true if there is another section to read, false if the end of the
* manifest has been reached
*@throws IOException if the manifest can't be read
*/
public boolean section( BufferedReader r)
throws IOException {
String field;
String front;
String name = null;
Attributes atts = new Attributes();
boolean gotany = false;
for ( ;; ) {
field = header( r);
if ( field == null || 0 == field.length() )
break;
gotany = true;
int i = field.indexOf( ": ");
if ( i < 1 ) {
System.err.printf( "Malformed line in manifest: %s\n", field);
System.exit( 1);
}
front = field.substring(0, i);
field = field.substring(i+2);
if ( front.equalsIgnoreCase( "Name") ) {
if ( name == null )
name = field;
else
System.err.println(
"Warning: name attribute repeated within a section, ignored.");
continue;
}
atts.putValue( front, field);
}
if ( ! gotany )
return null != field;
if ( null == name ) {
if ( null != mainAttributes ) {
System.err.println(
"Main attributes followed by another nameless section");
System.exit( 1);
}
setDefaults( atts);
}
else
store( name, atts);
return null != field;
}
/**Buffer used between calls to {@link #header(BufferedReader) header}.*/
String nextManifestLine = null;
/**Return one header line (complete after RFC822 continuation unfolding).
* <strong>Note:</strong> The Jar specification says it is "inspired by"
* RFC822, but the folding rule <strong>differs</strong>. RFC822 allows
* "linear whitespace" (i.e. space or tab) to start the continuation line,
* and the LWSP <em>remains in the line</em> (RFC822 lines are only supposed
* to be folded at places LWSP can appear). A jar manifest line continuation
* can only begin with a space, and the space is <em>eaten</em>; Java's
* manifest writer can arbitrarily fold in the middle of anything.
*@param r BufferedReader to read from
*@return the line read, or null at end of input
*@throws IOException if the input cannot be read
*/
public String header( BufferedReader r) throws IOException {
if ( nextManifestLine == null )
nextManifestLine = r.readLine();
String line = nextManifestLine;
for ( ;; ) {
nextManifestLine = r.readLine();
if ( nextManifestLine == null
|| ! nextManifestLine.startsWith( " ") )
break;
line += nextManifestLine.substring(1);
}
return line;
}
}
}