Code.java example

Explorer
jasm-master
// Copyright (c) 2011, David J. Pearce (djp@ecs.vuw.ac.nz)
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//    * Redistributions of source code must retain the above copyright
//      notice, this list of conditions and the following disclaimer.
//    * Redistributions in binary form must reproduce the above copyright
//      notice, this list of conditions and the following disclaimer in the
//      documentation and/or other materials provided with the distribution.
//    * Neither the name of the <organization> nor the
//      names of its contributors may be used to endorse or promote products
//      derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL DAVID J. PEARCE BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

package jasm.attributes;

import jasm.io.BinaryOutputStream;
import jasm.lang.*;
import jasm.util.*;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.*;

/**
 * This represents the Code attribute from the JVM Specification.
 *
 * @author David J. Pearce
 */
public class Code implements BytecodeAttribute {

	protected ArrayList<Bytecode> bytecodes;
	protected ArrayList<Handler> handlers;
	protected ArrayList<BytecodeAttribute> attributes;
	protected ClassFile.Method method; // enclosing method

	public Code(Collection<Bytecode> bytecodes,
			Collection<Handler> handlers, ClassFile.Method method) {
		this.bytecodes = new ArrayList<Bytecode>(bytecodes);
		this.handlers = new ArrayList<Handler>(handlers);
		this.method = method;
		this.attributes = new ArrayList<BytecodeAttribute>();
	}

	public String name() { return "Code"; }

	public List<BytecodeAttribute> attributes() {
		return attributes;
	}

	public <T extends BytecodeAttribute> T attribute(Class<T> c) {
		for(BytecodeAttribute a : attributes) {
			if(c.isInstance(a)) {
				return (T) a;
			}
		}
		return null;
	}

	/**
	 * Determine the maximum number of local variable slots required for
	 * this method.
	 *
	 * @return
	 */
	public int maxLocals() {
		int max = 0;
		for(Bytecode b :  bytecodes) {
			if(b instanceof Bytecode.Store) {
				Bytecode.Store s = (Bytecode.Store) b;
				max = Math.max(max, s.slot + ClassFile.slotSize(s.type));
			} else if(b instanceof Bytecode.Load) {
				Bytecode.Load l = (Bytecode.Load) b;
				max = Math.max(max, l.slot + ClassFile.slotSize(l.type));
			} else if(b instanceof Bytecode.Iinc) {
				Bytecode.Iinc l = (Bytecode.Iinc) b;
				max = Math.max(max, l.slot+1);
			}
		}

		// The reason for the following, is that we must compute the
		// *minimal* number of slots required. Essentially, this is enough
		// to hold the "this" pointer (if appropriate) and the parameters
		// supplied. The issue is that the bytecodes might not actually
		// access all of the parameters supplied, so just looking at them
		// might produce an underestimate.

		int thisp = method.isStatic() ? 0 : 1;
		int min = thisp;

		for(JvmType p :  method.type().parameterTypes()) {
			min += ClassFile.slotSize(p);
		}

		return Math.max(max+thisp,min);
	}

	/**
	 * Determine the maximum number of stack slots required for this method.
	 *
	 * @return
	 */
	public int maxStack() {
		// This algorithm computes a conservative over approximation. In
		// theory, we can do better, but there's little need to.

		int idx = 0;
		HashMap<String,Integer> labels = new HashMap<String,Integer>();
		for(Bytecode b : bytecodes) {
			if(b instanceof Bytecode.Label) {
				Bytecode.Label lab = (Bytecode.Label) b;
				labels.put(lab.name, idx);
			}
			idx = idx + 1;
		}

		HashMap<Integer,Integer> starts = new HashMap<Integer,Integer>();
		for(Handler h : handlers) {
			starts.put(labels.get(h.label),1);
		}

		idx = 0;
		int max = 0;
		int current = 0;

		for(Bytecode b : bytecodes) {
			if(starts.containsKey(idx)) {
				// This bytecode is the first of an exception handler. Such
				// handlers begin with the thrown exception object on the stack,
				// hence we must account for this.
				current = Math.max(current,starts.get(idx));
			}

			current = current + b.stackDiff();
			max = Math.max(current,max);

			if(b instanceof Bytecode.Goto) {
				Bytecode.Goto gto = (Bytecode.Goto) b;
				int offset = labels.get(gto.label);
				if(!starts.containsKey(offset)) {
					starts.put(offset, current);
				}
				current = 0;
			} if(b instanceof Bytecode.Throw) {
				current = 0;
			} else if(b instanceof Bytecode.If) {
				Bytecode.If gto = (Bytecode.If) b;
				int offset = labels.get(gto.label);
				if(!starts.containsKey(offset)) {
					starts.put(offset, current);
				}
			} else if(b instanceof Bytecode.Switch) {
				Bytecode.Switch gto = (Bytecode.Switch) b;
				for(Pair<Integer,String> c : gto.cases) {
					int offset = labels.get(c.second());
					if(!starts.containsKey(offset)) {
						starts.put(offset, current);
					}
				}
				int offset = labels.get(gto.defaultLabel);
				if(!starts.containsKey(offset)) {
					starts.put(offset, current);
				}
			}

			idx = idx + 1;
		}

		return max;
	}

	public List<Bytecode> bytecodes() {
		return bytecodes;
	}

	public List<Handler> handlers() {
		return handlers;
	}

	public void addPoolItems(Set<Constant.Info> constantPool) {
		Constant.addPoolItem(new Constant.Utf8("Code"), constantPool);

		for (Bytecode b : bytecodes()) {
			b.addPoolItems(constantPool);
		}

		for(Handler h : handlers) {
			if(!JvmTypes.isClass("java.lang","Throwable",h.exception)) {
				Constant.addPoolItem(Constant.buildClass(h.exception), constantPool);
			}
		}

		for(BytecodeAttribute a : attributes) {
			a.addPoolItems(constantPool);
		}
	}

	/**
	 * The exception handler class is used to store the necessary information
	 * about where control-flow is directed when an exception is raised.
	 *
	 * @author David J. Pearce
	 *
	 */
	public static class Handler {
		/**
		 * The start index of bytecodes covered by the handler.
		 */
		public int start;
		/**
		 * One past the last index covered by the handler.
		 */
		public int end;
		public String label;
		public JvmType.Clazz exception;

		public Handler(int start, int end, String label,
				JvmType.Clazz exception) {
			this.start = start;
			this.end = end;
			this.label = label;
			this.exception = exception;
		}
	}

	public void write(BinaryOutputStream writer,
			Map<Constant.Info, Integer> constantPool) throws IOException {
		// This method is a little tricky. The basic strategy is to first
		// translate each bytecode into it's binary representation. One
		// difficulty here, is that we must defer calculating the targets of
		// branch statements until after this is done, since we can't do the
		// calculation without exact values.

		// === DETERMINE LABEL OFFSETS ===

		HashMap<String, Integer> labelOffsets = new HashMap<String, Integer>();

		// The insnOffsets is used to map the statement index to the
		// corresponding bytecodes. This is used in determining the start and
		// end offsets for the exception handlers

		int[] insnOffsets = new int[bytecodes.size()];

		boolean guestimate = true;

		while(guestimate) {
			guestimate = false;
			// With this loop, we have to iterate until we reach a fixed point
			// regarding the label offsets. The basic issue is that, increasing
			// the size of a branch may result in other branches we've already
			// passed requiring their sizes be increased. This can happen
			// because switch statements adjust their size depending on their
			// offset and include padding appropriately. To resolve this, I
			// simply ensure that once a branch looks like it needs to be long,
			// then it's fixed as being long. This may, in very unusual cases,
			// be sub-optimal, but at least it ensures termination!
			int offset = 0;

			for (int i=0;i!=bytecodes.size();++i) {
				Bytecode b = bytecodes.get(i);
				insnOffsets[i] = offset;
				if (b instanceof Bytecode.Label) {
					Bytecode.Label l = (Bytecode.Label) b;
					if(labelOffsets.containsKey(l.name)) {
						int old = labelOffsets.get(l.name);
						if(old != offset) {
							guestimate=true;
						}
					}

					labelOffsets.put(l.name, offset);
				} else if (b instanceof Bytecode.Branch) {
					Bytecode.Branch br = (Bytecode.Branch) b;
					if(labelOffsets.containsKey(br.label))  {
						int len = br.toBytes(offset, labelOffsets, constantPool).length;
						offset += len;

						if(len > 3 && !br.islong) {
							// Now, this branch looks like it needs to be long,
							// so fix it so it's always long.
							bytecodes.set(i,br.fixLong());
						}
					} else {
						// In this case, we can't determine the offset of the
						// label, since we may not have passed it yet!
						// Therefore, for now, I assume that the bytecode requires 3
						// bytes (which is true, except for goto_w).
						offset += 3;
						guestimate = true;
					}
				} else if (b instanceof Bytecode.Switch) {
					// calculate switch statement size
					offset += ((Bytecode.Switch) b).getSize(offset);
				} else {
					offset += b.toBytes(offset, labelOffsets, constantPool).length;
				}
			}
		}

		// === CREATE BYTECODE BYTES ===

		ByteArrayOutputStream bout = new ByteArrayOutputStream();
		int offset = 0;
		for (Bytecode b : bytecodes) {
			byte[] bs = b.toBytes(offset, labelOffsets, constantPool);
			bout.write(bs);
			offset += bs.length;
		}
		byte[] bytecodebytes = bout.toByteArray();

		// === CREATE ATTRIBUTE BYTES
		bout = new ByteArrayOutputStream();
		BinaryOutputStream attrbout = new BinaryOutputStream(bout);
		for(BytecodeAttribute a : attributes) {
			if(a instanceof BytecodeMapAttribute) {
				BytecodeMapAttribute bap = (BytecodeMapAttribute) a;
				bap.write(insnOffsets, attrbout, constantPool);
			} else {
				a.write(attrbout, constantPool);
			}
		}
		byte[] attrbytes = bout.toByteArray();

		// === WRITE CODE ATTRIBUTE ===

		writer.write_u16(constantPool.get(new Constant.Utf8("Code")));
		// need to figure out exception_table length
		int exception_table_length = handlers().size() * 8;
		// need to figure out attribute_table length
		int attribute_table_length = attrbytes.length;
		// write attribute length
		writer.write_u32(bytecodebytes.length + exception_table_length + attribute_table_length
				+ 12);
		// now write data
		writer.write_u16(maxStack());
		writer.write_u16(maxLocals());
		writer.write_u32(bytecodebytes.length);
		// write bytecode instructions
		for (int i = 0; i != bytecodebytes.length; ++i) {
			writer.write_u8(bytecodebytes[i]);
		}

		// write exception handlers
		writer.write_u16(handlers().size());
		for (Handler h : handlers()) {
			writer.write_u16(insnOffsets[h.start]);
			writer.write_u16(insnOffsets[h.end]);
			writer.write_u16(labelOffsets.get(h.label));

			if (JvmTypes.isClass("java.lang", "Throwable", h.exception)) {
				writer.write_u16(0);
			} else {
				writer.write_u16(constantPool.get(Constant
						.buildClass(h.exception)));
			}
		}

		writer.write_u16(attributes.size());
		writer.write(attrbytes);
	}

	/**
	 * The purpose of this method is to validate a candidate list of rewrites.
	 * More specifically, a rewrite is considered to be invalid if it crosses an
	 * exception handler boundary. Such rewrites are automatically removed from
	 * the list.
	 *
	 * @param rewrites
	 */
	public void validate(List<Rewrite> rewrites) {
		for(int i=0;i!=rewrites.size();++i) {
			Rewrite rw = rewrites.get(i);
			int start = rw.start;
			int end = start + rw.length;
			for(Handler h : handlers) {
				int hstart = h.start;
				int hend = h.end;
				if ((hstart < end && hend >= end)
						|| (hstart < start && hend >= start)) {
					// Not OK
					rewrites.remove(i);
					i = i - 1;
					break;
				}
			}
		}
	}

	/**
	 * This method accepts a list of rewrites which should be applied. For
	 * efficiency reasons, several constraints are made on the list:
	 * <ol>
	 * <li>The rewrites are ordered by their start location, such that the
	 * first rewrite has the lowest start location</li>
	 * <li>The rewrites don't overlap. That is, we assume only one rewrite can
	 * be applied to any given region of bytecodes.</li>
	 * </ol>
	 * If the complete set of rewrites cannot be constructed according to these
	 * constraints, then it needs to be split up into several calls to this
	 * method.
	 *
	 * @param rewrites
	 */
	public void apply(List<Rewrite> rewrites) {
		int offset = 0;

		// Ok, there's a bit of a hack here, since I assume that the rewrites
		// never increase the number of bytecodes!
		for(Rewrite rw : rewrites) {
			int start = rw.start + offset;
			int pos = start;
			Bytecode[] codes = rw.bytecodes;
			for(int i=0;i!=codes.length;++i,++pos) {
				bytecodes.set(pos,codes[i]);
			}

			// Now, remove any remaining slots that were erased.
			int diff = rw.length - codes.length;
			for(int i=0;i!=diff;++i) {
				bytecodes.remove(pos);
			}
			offset -= diff;

			// Now, update the handlers appropriately
			int end = start + rw.length;
			for (Handler h : handlers) {
				int hstart = h.start;
				int hend = h.end;
				if (hstart <= start && hend > start) {
					hend -= diff;
				} else if (hstart >= end) {
					hstart -= diff;
					hend -= diff;
				} else if ((hstart < end && hend >= end)
						|| (hstart < start && hend >= start)) {
					throw new RuntimeException(
							"Attempt to optimise an instruction that partially straddles an exception boundary!");
				}
				h.start = hstart;
				h.end = hend;
			}
		}
	}

	public void print(PrintWriter output,
			Map<Constant.Info, Integer> constantPool) {
		output.println("  Code:");
		//output.println("   stack = " + maxStack() + ", locals = " + maxLocals());

		for (Bytecode b : bytecodes) {
			if (b instanceof Bytecode.Label) {
				output.println("  " + b);
			} else {
				output.println("   " + b);
			}
		}
	}

	/**
	 * A rewrite defines a sequence of bytecodes that are to be rewritten as a
	 * (potentially) smaller sequence.
	 *
	 * @author David J. Pearce
	 *
	 */
	public static class Rewrite {
		public final int start;  // first bytecode in sequence to be replaced
		public final int length; // number of bytecodes to replace
		public final Bytecode[] bytecodes; // array of bytecodes to substitute

		public Rewrite(int start, int length, Bytecode... bytecodes) {
			this.start = start;
			this.length = length;
			this.bytecodes = bytecodes;
		}
	}

	/**
	 * <p>
	 * Maps bytecodes to some kind of attribute. For example, the Exceptions
	 * attribute maps bytecodes to exception handler regions; likewise, the
	 * LineNumbersTable attribute maps bytecodes to source code line numbers.
	 * </p>
	 *
	 * <p>
	 * During bytecode optimisation, the relative position of bytecodes may
	 * change as a result of eliminating redundant bytecodes. In such a case we
	 * need to update those attributes which are affected. This interface
	 * captures those attributes which are affected, and provides a hook to tell
	 * them about rewrites as they happen.
	 * </p>
	 *
	 * <p>
	 * Finally, the actual bytecode offsets in the code block (as opposed to
	 * their index in the block) are not known until the class file is actually
	 * written. Attributes which write bytecode offsets must convert between
	 * indices and actual code offsets.
	 * </p>
	 *
	 * @author David J. Pearce
	 *
	 */
	public static interface BytecodeMapAttribute extends BytecodeAttribute {

		/**
		 * This method accepts a list of rewrites which should be applied. For
		 * efficiency reasons, several constraints are made on the list:
		 * <ol>
		 * <li>The rewrites are ordered by their start location, such that the
		 * first rewrite has the lowest start location</li>
		 * <li>The rewrites don't overlap. That is, we assume only one rewrite
		 * can be applied to any given region of bytecodes.</li>
		 * </ol>
		 * If the complete set of rewrites cannot be constructed according to
		 * these constraints, then it needs to be split up into several calls to
		 * this method.
		 *
		 * @param rewrites
		 */
		public void apply(List<Rewrite> rewrites);

		/**
		 * This method requires the attribute to write itself to the binary
		 * stream.
		 *
		 * @param bytecodeOffsets
		 *            --- maps each bytecode index to its actual offset in the
		 *            code block.
		 * @param writer
		 *            --- stream to write attribute to
		 * @param constantPool
		 *            --- map of constant pool items to their actual pool index
		 * @throws IOException
		 */
		public void write(int[] bytecodeOffsets, BinaryOutputStream writer,
				Map<Constant.Info, Integer> constantPool)
				throws IOException;
	}
}