// Copyright (c) 2011, David J. Pearce (djp@ecs.vuw.ac.nz) // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of the <organization> nor the // names of its contributors may be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE // DISCLAIMED. IN NO EVENT SHALL DAVID J. PEARCE BE LIABLE FOR ANY // DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND // ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package jasm.attributes; import jasm.io.BinaryOutputStream; import jasm.lang.*; import jasm.util.*; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.util.*; /** * This represents the Code attribute from the JVM Specification. * * @author David J. Pearce */ public class Code implements BytecodeAttribute { protected ArrayList<Bytecode> bytecodes; protected ArrayList<Handler> handlers; protected ArrayList<BytecodeAttribute> attributes; protected ClassFile.Method method; // enclosing method public Code(Collection<Bytecode> bytecodes, Collection<Handler> handlers, ClassFile.Method method) { this.bytecodes = new ArrayList<Bytecode>(bytecodes); this.handlers = new ArrayList<Handler>(handlers); this.method = method; this.attributes = new ArrayList<BytecodeAttribute>(); } public String name() { return "Code"; } public List<BytecodeAttribute> attributes() { return attributes; } public <T extends BytecodeAttribute> T attribute(Class<T> c) { for(BytecodeAttribute a : attributes) { if(c.isInstance(a)) { return (T) a; } } return null; } /** * Determine the maximum number of local variable slots required for * this method. * * @return */ public int maxLocals() { int max = 0; for(Bytecode b : bytecodes) { if(b instanceof Bytecode.Store) { Bytecode.Store s = (Bytecode.Store) b; max = Math.max(max, s.slot + ClassFile.slotSize(s.type)); } else if(b instanceof Bytecode.Load) { Bytecode.Load l = (Bytecode.Load) b; max = Math.max(max, l.slot + ClassFile.slotSize(l.type)); } else if(b instanceof Bytecode.Iinc) { Bytecode.Iinc l = (Bytecode.Iinc) b; max = Math.max(max, l.slot+1); } } // The reason for the following, is that we must compute the // *minimal* number of slots required. Essentially, this is enough // to hold the "this" pointer (if appropriate) and the parameters // supplied. The issue is that the bytecodes might not actually // access all of the parameters supplied, so just looking at them // might produce an underestimate. int thisp = method.isStatic() ? 0 : 1; int min = thisp; for(JvmType p : method.type().parameterTypes()) { min += ClassFile.slotSize(p); } return Math.max(max+thisp,min); } /** * Determine the maximum number of stack slots required for this method. * * @return */ public int maxStack() { // This algorithm computes a conservative over approximation. In // theory, we can do better, but there's little need to. int idx = 0; HashMap<String,Integer> labels = new HashMap<String,Integer>(); for(Bytecode b : bytecodes) { if(b instanceof Bytecode.Label) { Bytecode.Label lab = (Bytecode.Label) b; labels.put(lab.name, idx); } idx = idx + 1; } HashMap<Integer,Integer> starts = new HashMap<Integer,Integer>(); for(Handler h : handlers) { starts.put(labels.get(h.label),1); } idx = 0; int max = 0; int current = 0; for(Bytecode b : bytecodes) { if(starts.containsKey(idx)) { // This bytecode is the first of an exception handler. Such // handlers begin with the thrown exception object on the stack, // hence we must account for this. current = Math.max(current,starts.get(idx)); } current = current + b.stackDiff(); max = Math.max(current,max); if(b instanceof Bytecode.Goto) { Bytecode.Goto gto = (Bytecode.Goto) b; int offset = labels.get(gto.label); if(!starts.containsKey(offset)) { starts.put(offset, current); } current = 0; } if(b instanceof Bytecode.Throw) { current = 0; } else if(b instanceof Bytecode.If) { Bytecode.If gto = (Bytecode.If) b; int offset = labels.get(gto.label); if(!starts.containsKey(offset)) { starts.put(offset, current); } } else if(b instanceof Bytecode.Switch) { Bytecode.Switch gto = (Bytecode.Switch) b; for(Pair<Integer,String> c : gto.cases) { int offset = labels.get(c.second()); if(!starts.containsKey(offset)) { starts.put(offset, current); } } int offset = labels.get(gto.defaultLabel); if(!starts.containsKey(offset)) { starts.put(offset, current); } } idx = idx + 1; } return max; } public List<Bytecode> bytecodes() { return bytecodes; } public List<Handler> handlers() { return handlers; } public void addPoolItems(Set<Constant.Info> constantPool) { Constant.addPoolItem(new Constant.Utf8("Code"), constantPool); for (Bytecode b : bytecodes()) { b.addPoolItems(constantPool); } for(Handler h : handlers) { if(!JvmTypes.isClass("java.lang","Throwable",h.exception)) { Constant.addPoolItem(Constant.buildClass(h.exception), constantPool); } } for(BytecodeAttribute a : attributes) { a.addPoolItems(constantPool); } } /** * The exception handler class is used to store the necessary information * about where control-flow is directed when an exception is raised. * * @author David J. Pearce * */ public static class Handler { /** * The start index of bytecodes covered by the handler. */ public int start; /** * One past the last index covered by the handler. */ public int end; public String label; public JvmType.Clazz exception; public Handler(int start, int end, String label, JvmType.Clazz exception) { this.start = start; this.end = end; this.label = label; this.exception = exception; } } public void write(BinaryOutputStream writer, Map<Constant.Info, Integer> constantPool) throws IOException { // This method is a little tricky. The basic strategy is to first // translate each bytecode into it's binary representation. One // difficulty here, is that we must defer calculating the targets of // branch statements until after this is done, since we can't do the // calculation without exact values. // === DETERMINE LABEL OFFSETS === HashMap<String, Integer> labelOffsets = new HashMap<String, Integer>(); // The insnOffsets is used to map the statement index to the // corresponding bytecodes. This is used in determining the start and // end offsets for the exception handlers int[] insnOffsets = new int[bytecodes.size()]; boolean guestimate = true; while(guestimate) { guestimate = false; // With this loop, we have to iterate until we reach a fixed point // regarding the label offsets. The basic issue is that, increasing // the size of a branch may result in other branches we've already // passed requiring their sizes be increased. This can happen // because switch statements adjust their size depending on their // offset and include padding appropriately. To resolve this, I // simply ensure that once a branch looks like it needs to be long, // then it's fixed as being long. This may, in very unusual cases, // be sub-optimal, but at least it ensures termination! int offset = 0; for (int i=0;i!=bytecodes.size();++i) { Bytecode b = bytecodes.get(i); insnOffsets[i] = offset; if (b instanceof Bytecode.Label) { Bytecode.Label l = (Bytecode.Label) b; if(labelOffsets.containsKey(l.name)) { int old = labelOffsets.get(l.name); if(old != offset) { guestimate=true; } } labelOffsets.put(l.name, offset); } else if (b instanceof Bytecode.Branch) { Bytecode.Branch br = (Bytecode.Branch) b; if(labelOffsets.containsKey(br.label)) { int len = br.toBytes(offset, labelOffsets, constantPool).length; offset += len; if(len > 3 && !br.islong) { // Now, this branch looks like it needs to be long, // so fix it so it's always long. bytecodes.set(i,br.fixLong()); } } else { // In this case, we can't determine the offset of the // label, since we may not have passed it yet! // Therefore, for now, I assume that the bytecode requires 3 // bytes (which is true, except for goto_w). offset += 3; guestimate = true; } } else if (b instanceof Bytecode.Switch) { // calculate switch statement size offset += ((Bytecode.Switch) b).getSize(offset); } else { offset += b.toBytes(offset, labelOffsets, constantPool).length; } } } // === CREATE BYTECODE BYTES === ByteArrayOutputStream bout = new ByteArrayOutputStream(); int offset = 0; for (Bytecode b : bytecodes) { byte[] bs = b.toBytes(offset, labelOffsets, constantPool); bout.write(bs); offset += bs.length; } byte[] bytecodebytes = bout.toByteArray(); // === CREATE ATTRIBUTE BYTES bout = new ByteArrayOutputStream(); BinaryOutputStream attrbout = new BinaryOutputStream(bout); for(BytecodeAttribute a : attributes) { if(a instanceof BytecodeMapAttribute) { BytecodeMapAttribute bap = (BytecodeMapAttribute) a; bap.write(insnOffsets, attrbout, constantPool); } else { a.write(attrbout, constantPool); } } byte[] attrbytes = bout.toByteArray(); // === WRITE CODE ATTRIBUTE === writer.write_u16(constantPool.get(new Constant.Utf8("Code"))); // need to figure out exception_table length int exception_table_length = handlers().size() * 8; // need to figure out attribute_table length int attribute_table_length = attrbytes.length; // write attribute length writer.write_u32(bytecodebytes.length + exception_table_length + attribute_table_length + 12); // now write data writer.write_u16(maxStack()); writer.write_u16(maxLocals()); writer.write_u32(bytecodebytes.length); // write bytecode instructions for (int i = 0; i != bytecodebytes.length; ++i) { writer.write_u8(bytecodebytes[i]); } // write exception handlers writer.write_u16(handlers().size()); for (Handler h : handlers()) { writer.write_u16(insnOffsets[h.start]); writer.write_u16(insnOffsets[h.end]); writer.write_u16(labelOffsets.get(h.label)); if (JvmTypes.isClass("java.lang", "Throwable", h.exception)) { writer.write_u16(0); } else { writer.write_u16(constantPool.get(Constant .buildClass(h.exception))); } } writer.write_u16(attributes.size()); writer.write(attrbytes); } /** * The purpose of this method is to validate a candidate list of rewrites. * More specifically, a rewrite is considered to be invalid if it crosses an * exception handler boundary. Such rewrites are automatically removed from * the list. * * @param rewrites */ public void validate(List<Rewrite> rewrites) { for(int i=0;i!=rewrites.size();++i) { Rewrite rw = rewrites.get(i); int start = rw.start; int end = start + rw.length; for(Handler h : handlers) { int hstart = h.start; int hend = h.end; if ((hstart < end && hend >= end) || (hstart < start && hend >= start)) { // Not OK rewrites.remove(i); i = i - 1; break; } } } } /** * This method accepts a list of rewrites which should be applied. For * efficiency reasons, several constraints are made on the list: * <ol> * <li>The rewrites are ordered by their start location, such that the * first rewrite has the lowest start location</li> * <li>The rewrites don't overlap. That is, we assume only one rewrite can * be applied to any given region of bytecodes.</li> * </ol> * If the complete set of rewrites cannot be constructed according to these * constraints, then it needs to be split up into several calls to this * method. * * @param rewrites */ public void apply(List<Rewrite> rewrites) { int offset = 0; // Ok, there's a bit of a hack here, since I assume that the rewrites // never increase the number of bytecodes! for(Rewrite rw : rewrites) { int start = rw.start + offset; int pos = start; Bytecode[] codes = rw.bytecodes; for(int i=0;i!=codes.length;++i,++pos) { bytecodes.set(pos,codes[i]); } // Now, remove any remaining slots that were erased. int diff = rw.length - codes.length; for(int i=0;i!=diff;++i) { bytecodes.remove(pos); } offset -= diff; // Now, update the handlers appropriately int end = start + rw.length; for (Handler h : handlers) { int hstart = h.start; int hend = h.end; if (hstart <= start && hend > start) { hend -= diff; } else if (hstart >= end) { hstart -= diff; hend -= diff; } else if ((hstart < end && hend >= end) || (hstart < start && hend >= start)) { throw new RuntimeException( "Attempt to optimise an instruction that partially straddles an exception boundary!"); } h.start = hstart; h.end = hend; } } } public void print(PrintWriter output, Map<Constant.Info, Integer> constantPool) { output.println(" Code:"); //output.println(" stack = " + maxStack() + ", locals = " + maxLocals()); for (Bytecode b : bytecodes) { if (b instanceof Bytecode.Label) { output.println(" " + b); } else { output.println(" " + b); } } } /** * A rewrite defines a sequence of bytecodes that are to be rewritten as a * (potentially) smaller sequence. * * @author David J. Pearce * */ public static class Rewrite { public final int start; // first bytecode in sequence to be replaced public final int length; // number of bytecodes to replace public final Bytecode[] bytecodes; // array of bytecodes to substitute public Rewrite(int start, int length, Bytecode... bytecodes) { this.start = start; this.length = length; this.bytecodes = bytecodes; } } /** * <p> * Maps bytecodes to some kind of attribute. For example, the Exceptions * attribute maps bytecodes to exception handler regions; likewise, the * LineNumbersTable attribute maps bytecodes to source code line numbers. * </p> * * <p> * During bytecode optimisation, the relative position of bytecodes may * change as a result of eliminating redundant bytecodes. In such a case we * need to update those attributes which are affected. This interface * captures those attributes which are affected, and provides a hook to tell * them about rewrites as they happen. * </p> * * <p> * Finally, the actual bytecode offsets in the code block (as opposed to * their index in the block) are not known until the class file is actually * written. Attributes which write bytecode offsets must convert between * indices and actual code offsets. * </p> * * @author David J. Pearce * */ public static interface BytecodeMapAttribute extends BytecodeAttribute { /** * This method accepts a list of rewrites which should be applied. For * efficiency reasons, several constraints are made on the list: * <ol> * <li>The rewrites are ordered by their start location, such that the * first rewrite has the lowest start location</li> * <li>The rewrites don't overlap. That is, we assume only one rewrite * can be applied to any given region of bytecodes.</li> * </ol> * If the complete set of rewrites cannot be constructed according to * these constraints, then it needs to be split up into several calls to * this method. * * @param rewrites */ public void apply(List<Rewrite> rewrites); /** * This method requires the attribute to write itself to the binary * stream. * * @param bytecodeOffsets * --- maps each bytecode index to its actual offset in the * code block. * @param writer * --- stream to write attribute to * @param constantPool * --- map of constant pool items to their actual pool index * @throws IOException */ public void write(int[] bytecodeOffsets, BinaryOutputStream writer, Map<Constant.Info, Integer> constantPool) throws IOException; } }