/* * Copyright 2002-2003 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. * */ /* * Original code for this class taken from the Java HotSpot VM. * Modified for use with the Jakstab project. All modifications * Copyright 2007-2015 Johannes Kinder <jk@jakstab.org> */ package org.jakstab.disasm.x86; import org.jakstab.util.BinaryInputBuffer; import org.jakstab.util.Logger; import org.jakstab.asm.*; import org.jakstab.asm.x86.*; public class InstructionDecoder implements /* imports */ X86Opcodes { private final static Logger logger = Logger.getLogger(InstructionDecoder.class); /* * The three read methods are now changed to call into BinaryInputBuffer * to remove the dependency on byte[]. This could be much nicer, i.e., * integrate all the reading logic with BinaryInputBuffer, but it's * probably not worth the effort. --JK, 8.11.2010 */ static int readByte(BinaryInputBuffer bytesArray, int index) { int ret = 0; if (index < bytesArray.getSize()) { ret = bytesArray.getByteAt(index); ret = ret & 0xff; } else { throw new ArrayIndexOutOfBoundsException("Disassembler requested byte outside of file area: 0x" + Long.toHexString(index)); } return ret; } static int readInt16(BinaryInputBuffer bytesArray, int index) { int ret = 0; ret = readByte(bytesArray, index); ret |= readByte(bytesArray, index+1) << 8; return ret; } static int readInt32(BinaryInputBuffer bytesArray, int index) { int ret = 0; ret = readByte(bytesArray, index); ret |= readByte(bytesArray, index+1) << 8; ret |= readByte(bytesArray, index+2) << 16; ret |= readByte(bytesArray, index+3) << 24; return ret; } // Fixed for every instance protected final int addrMode1; protected final int addrMode2; protected final int addrMode3; protected final String nameTemplate; protected final int operandType1; protected final int operandType2; protected final int operandType3; // Working variables, change with every decoded instruction private int mod; private int regOrOpcode; private int rm; protected int byteIndex; protected int instrStartIndex; protected String name; protected int prefixes; public InstructionDecoder(String name) { this(name, INVALID_ADDRMODE, INVALID_OPERANDTYPE); } public InstructionDecoder(String name, int addrMode1, int operandType1) { this(name, addrMode1, operandType1, INVALID_ADDRMODE, INVALID_OPERANDTYPE); } public InstructionDecoder(String name, int addrMode1, int operandType1, int addrMode2, int operandType2) { this(name, addrMode1, operandType1, addrMode2, operandType2, INVALID_ADDRMODE, INVALID_OPERANDTYPE); } public InstructionDecoder(String name, int addrMode1, int operandType1, int addrMode2, int operandType2, int addrMode3, int operandType3) { this.nameTemplate = name; this.operandType1 = operandType1; this.operandType2 = operandType2; this.operandType3 = operandType3; this.addrMode1 = addrMode1; this.addrMode2 = addrMode2; this.addrMode3 = addrMode3; } public Instruction decode(BinaryInputBuffer bytesArray, int index, int instrStartIndex, int segmentOverride, int prefixes, X86InstructionFactory factory) { this.byteIndex = index; this.instrStartIndex = instrStartIndex; this.prefixes = prefixes; boolean operandSize; //operand-size prefix boolean addrSize; //address-size prefix // segmentoverride is set to 1 in X86Disassembler. Correct for 32bit mode. if ( ( (prefixes & PREFIX_DATA) ^ segmentOverride ) == 1) operandSize = true; // set 32bit operand mode else operandSize = false; if ( ((prefixes & PREFIX_ADR) ^ segmentOverride) == 1) addrSize = true; else addrSize = false; this.name = getCorrectOpcodeName(nameTemplate, prefixes, operandSize, addrSize); //Fetch the mod/reg/rm byte only if it is present. if( isModRMPresent(addrMode1) || isModRMPresent(addrMode2) || isModRMPresent(addrMode3) ) { int ModRM = readByte(bytesArray, byteIndex); byteIndex++; mod = (ModRM >> 6) & 3; regOrOpcode = (ModRM >> 3) & 7; rm = ModRM & 7; } // Call instruction specific code return decodeInstruction(bytesArray, operandSize, addrSize, factory); } public int getCurrentIndex() { return byteIndex; } /** * This is the instruction specific decoder. Gets overridden by the subclasses of * InstructionDecoder. This is the fallback implementation for X86Instruction * Objects. Note that 8bit operands are encoded implicitly in the opcode. * * @param bytesArray The array of bytes representing the binary. * @param operandSize True for 32bit, false for 16bit operands. * @param addrSize True for 32bit addresses, false for 16bit. * @param factory The instruction factory to use. * @return A new object representing the instruction at the current byteIndex. */ protected Instruction decodeInstruction(BinaryInputBuffer bytesArray, boolean operandSize, boolean addrSize, X86InstructionFactory factory) { Operand op1 = getOperand1(bytesArray, operandSize, addrSize); Operand op2 = getOperand2(bytesArray, operandSize, addrSize); Operand op3 = getOperand3(bytesArray, operandSize, addrSize); int size = byteIndex - instrStartIndex; return factory.newGeneralInstruction(name, op1, op2, op3, size, prefixes); } protected Operand getOperand1(BinaryInputBuffer bytesArray, boolean operandSize, boolean addrSize) { if( (addrMode1 != INVALID_ADDRMODE) && (operandType1 != INVALID_OPERANDTYPE) ) return getOperand(bytesArray, addrMode1, operandType1, operandSize, addrSize); else return null; } protected Operand getOperand2(BinaryInputBuffer bytesArray, boolean operandSize, boolean addrSize) { if( (addrMode2 != INVALID_ADDRMODE) && (operandType2 != INVALID_OPERANDTYPE) ) return getOperand(bytesArray, addrMode2, operandType2, operandSize, addrSize); else return null; } protected Operand getOperand3(BinaryInputBuffer bytesArray, boolean operandSize, boolean addrSize) { if( (addrMode3 != INVALID_ADDRMODE) && (operandType3 != INVALID_OPERANDTYPE) ) return getOperand(bytesArray, addrMode3, operandType3, operandSize, addrSize); else return null; } /** * Instantiates the name template for the current instruction by replacing * capital letters with their correct counterparts depending on various * prefixes. * * @param oldName the instruction template string * @param prefixes instruction prefixes * @param operandSize true for 32bit operands, false for 16bit. * @param addrSize true for 32bit addressing, false for 16bit. * @return a new string with the correct AT&T name. */ private String getCorrectOpcodeName(String oldName, int prefixes, boolean operandSize, boolean addrSize) { StringBuffer newName = new StringBuffer(oldName.length()); int index = 0; for(index=0; index<oldName.length(); index++) { switch (oldName.charAt(index)) { case 'C': /* For jcxz/jecxz */ if (addrSize) newName.append('e'); break; case 'N': if ((prefixes & PREFIX_FWAIT) == 0) newName.append('n'); break; case 'S': /* operand size flag */ if (operandSize == true) newName.append('l'); else newName.append('w'); break; default: newName.append(oldName.charAt(index)); } } return newName.toString(); } private DataType getDataType(int operandType, boolean operandSize) { switch (operandType) { case d_mode: return DataType.INT32; case v_mode: if (operandSize) return DataType.INT32; else return DataType.INT16; case w_mode: return DataType.INT16; case b_mode: return DataType.INT8; case q_mode: return DataType.INT64; case dq_mode: return DataType.INT128; case fs_mode: case ss_mode: // SSE: scalar single precision : 32 bit return DataType.FL_SINGLE; case fd_mode: case sd_mode: // SSE: scalar double precision : 64 bit return DataType.FL_DOUBLE; case fe_mode: return DataType.FL_EXT_DOUBLE; case fq_mode: return DataType.FL_QUAD; case ps_mode: // SSE: packed single precision : 128 bit case pd_mode: // SSE: packed double precision : 128 bit return DataType.FL_QUAD; default: // This should only be the case for SSA-instructions and maybe segment-load instructions? logger.error("Unknown data type for operand type: " + operandType + "!"); throw new RuntimeException(); //return DataType.UNKNOWN; } } /** * Get correct Operand object from address type and operand type * * @param bytesArray the code array * @param addrMode addressing mode constant * @param operandType operand type constant * @param operandSize true for 32bit, false for 16bit operands * @param addrSize true for 32bit, false for 16bit addresses * @return a new operand object */ private Operand getOperand(BinaryInputBuffer bytesArray, int addrMode, int operandType, boolean operandSize, boolean addrSize) { Operand op = null; X86SegmentRegister segReg = getSegmentRegisterFromPrefix(prefixes); switch(addrMode) { case ADDR_E: case ADDR_W: //SSE: ModR/M byte specifies either 128 bit XMM register or memory case ADDR_Q: //SSE: ModR/M byte specifies either 128 bit MMX register or memory //X86SegmentRegister segReg = getSegmentRegisterFromPrefix(prefixes); if (mod == 3) { //Register operand, no SIB follows if (addrMode == ADDR_E) { switch (operandType) { case b_mode: op = X86Registers.getRegister8(rm); break; case w_mode: op = X86Registers.getRegister16(rm); break; case v_mode: if (operandSize == true) //Operand size prefix is present op = X86Registers.getRegister32(rm); else op = X86Registers.getRegister16(rm); break; case p_mode: X86Register reg; if (operandSize == true) //Operand size prefix is present reg = X86Registers.getRegister32(rm); else reg = X86Registers.getRegister16(rm); op = new X86MemoryOperand(getDataType(operandType, operandSize), segReg, reg, null, 0); break; case d_mode: op = X86Registers.getRegister32(rm); break; default: break; } } else if (addrMode == ADDR_W) { op = X86XMMRegisters.getRegister(rm); } else if (addrMode == ADDR_Q) { op = X86MMXRegisters.getRegister(rm); } } else { //mod != 3 //SIB follows for (rm==4), SIB gives scale, index and base in this case //disp32 is present for (mod==0 && rm==5) || (mod==2) //disp8 is present for (mod==1) //for (rm!=4) base is register at rm. int scale = 0; int index = 0; int base = 0; long disp = 0; if(rm == 4) { int sib = readByte(bytesArray, byteIndex); byteIndex++; scale = (sib >> 6) & 3; index = (sib >> 3) & 7; base = sib & 7; } switch (mod) { case 0: switch(rm) { case 4: if(base == 5) { disp = readInt32(bytesArray, byteIndex); byteIndex += 4; if (index != 4) { op = new X86MemoryOperand(getDataType(operandType, operandSize), segReg, null, X86Registers.getRegister32(index), disp, scale); } else { op = new X86MemoryOperand(getDataType(operandType, operandSize), segReg, null, null, disp, scale); } } else { if (index != 4) { op = new X86MemoryOperand(getDataType(operandType, operandSize), segReg, X86Registers.getRegister32(base), X86Registers.getRegister32(index), 0, scale); } else { op = new X86MemoryOperand(getDataType(operandType, operandSize), segReg, X86Registers.getRegister32(base), null, 0, scale); } } break; case 5: disp = readInt32(bytesArray, byteIndex); byteIndex += 4; //Create an Address object only with displacement op = new X86MemoryOperand(getDataType(operandType, operandSize), segReg, null, null, disp); break; default: base = rm; //Create an Address object only with base op = new X86MemoryOperand(getDataType(operandType, operandSize), segReg, X86Registers.getRegister32(base), null, 0); break; } break; case 1: disp = (byte)readByte(bytesArray, byteIndex); byteIndex++; if (rm !=4) { base = rm; //Address with base and disp only op = new X86MemoryOperand(getDataType(operandType, operandSize), segReg, X86Registers.getRegister32(base), null, disp); } else { if (index != 4) { op = new X86MemoryOperand(getDataType(operandType, operandSize), segReg, X86Registers.getRegister32(base), X86Registers.getRegister32(index), disp, scale); } else { op = new X86MemoryOperand(getDataType(operandType, operandSize), segReg, X86Registers.getRegister32(base), null, disp, scale); } } break; case 2: disp = readInt32(bytesArray, byteIndex); byteIndex += 4; if (rm !=4) { base = rm; //Address with base and disp op = new X86MemoryOperand(getDataType(operandType, operandSize), segReg, X86Registers.getRegister32(base), null, disp); } else if (index != 4) { op = new X86MemoryOperand(getDataType(operandType, operandSize), segReg, X86Registers.getRegister32(base), X86Registers.getRegister32(index), disp, scale); } else { op = new X86MemoryOperand(getDataType(operandType, operandSize), segReg, X86Registers.getRegister32(base), null, disp, scale); } break; } } break; case ADDR_I: switch (operandType) { case b_mode: op = new Immediate(new Byte((byte)readByte(bytesArray, byteIndex)), DataType.UINT8); byteIndex++; break; case w_mode: op = new Immediate(new Short((short)readInt16(bytesArray, byteIndex)), DataType.UINT16); byteIndex += 2; break; case v_mode: if (operandSize == true) { //Operand size prefix is present op = new Immediate(new Integer(readInt32(bytesArray, byteIndex)), DataType.UINT32); byteIndex += 4; } else { op = new Immediate(new Short((short)readInt16(bytesArray, byteIndex)), DataType.UINT16); byteIndex += 2; } break; default: break; } break; case ADDR_REG: //registers switch(operandType) { case EAX: case ECX: case EDX: case EBX: case ESP: case EBP: case ESI: case EDI: if(operandSize == true) { op = X86Registers.getRegister32(operandType - EAX); } else { op = X86Registers.getRegister16(operandType - EAX); } break; case AX: case CX: case DX: case BX: case SP: case BP: case SI: case DI: op = X86Registers.getRegister16(operandType - AX); break; case AL: case CL: case DL: case BL: case AH: case CH: case DH: case BH: op = X86Registers.getRegister8(operandType - AL); break; case ES: //ES, CS, SS, DS, FS, GS case CS: case SS: case DS: case FS: case GS: op = X86SegmentRegisters.getSegmentRegister(operandType - ES); break; } break; case ADDR_DIR: //segment and offset long segment = 0; long offset = 0; switch (operandType) { case p_mode: if (addrSize == true) { offset = readInt32(bytesArray, byteIndex); byteIndex += 4; segment = readInt16(bytesArray, byteIndex); byteIndex += 2; } else { offset = readInt16(bytesArray, byteIndex); byteIndex += 2; segment = readInt16(bytesArray, byteIndex); byteIndex += 2; } op = new X86AbsoluteAddress(segment, offset); //with offset break; case v_mode: if (addrSize == true) { offset = readInt32(bytesArray, byteIndex); byteIndex += 4; } else { offset = readInt16(bytesArray, byteIndex); byteIndex += 2; } op = new X86AbsoluteAddress(offset); //with offset break; default: break; } break; case ADDR_G: switch (operandType) { case b_mode: op = X86Registers.getRegister8(regOrOpcode); break; case w_mode: op = X86Registers.getRegister16(regOrOpcode); break; case d_mode: op = X86Registers.getRegister32(regOrOpcode); break; case v_mode: if (operandSize == true) op = X86Registers.getRegister32(regOrOpcode); else op = X86Registers.getRegister16(regOrOpcode); break; default: break; } break; case ADDR_SEG: op = X86SegmentRegisters.getSegmentRegister(regOrOpcode); break; case ADDR_OFF: int off = 0; if (addrSize == true) { off = readInt32(bytesArray, byteIndex); byteIndex += 4; } else { off = readInt16(bytesArray, byteIndex); byteIndex += 2; } //op = new X86AbsoluteAddress((long)off); // --JK: This is actually a memory operand with constant address used by MOV. // Absolute Addresses are now used only for far calls and far jumps. op = new X86MemoryOperand(getDataType(operandType, operandSize), segReg, off); // JK- Added segReg for mov fs:0, ecx break; case ADDR_J: long disp = 0; //The effective address is Instruction pointer + relative offset switch(operandType) { case b_mode: disp = (byte)readByte(bytesArray, byteIndex); byteIndex++; break; case v_mode: if (operandSize == true) { disp = readInt32(bytesArray, byteIndex); byteIndex += 4; } else { disp = readInt16(bytesArray, byteIndex); byteIndex += 2; } //disp = disp + (byteIndex-instrStartIndex); break; } op = new X86PCRelativeAddress(disp); break; case ADDR_ESDI: op = new X86MemoryOperand(getDataType(operandType, operandSize), X86SegmentRegisters.ES, X86Registers.EDI); break; case ADDR_DSSI: op = new X86MemoryOperand(getDataType(operandType, operandSize), X86SegmentRegisters.DS, X86Registers.ESI); break; case ADDR_R: switch (operandType) { case b_mode: op = X86Registers.getRegister8(mod); break; case w_mode: op = X86Registers.getRegister16(mod); break; case d_mode: op = X86Registers.getRegister32(mod); break; case v_mode: if (operandSize == true) op = X86Registers.getRegister32(mod); else op = X86Registers.getRegister16(mod); break; default: break; } break; case ADDR_FPREG: switch (operandType) { case 0: op = X86FloatRegisters.getRegister(0); break; case 1: op = X86FloatRegisters.getRegister(rm); break; } break; //SSE: reg field of ModR/M byte selects a 128-bit XMM register case ADDR_V: op = X86XMMRegisters.getRegister(regOrOpcode); break; //SSE: reg field of ModR/M byte selects a 64-bit MMX register case ADDR_P: op = X86MMXRegisters.getRegister(regOrOpcode); break; case ADDR_C: op = X86ControlRegisters.getRegister(regOrOpcode); break; case ADDR_RMR: op = X86Registers.getRegister32(rm); break; case ADDR_D: logger.error("Debug registers not supported!"); case INDIR_REG: if (operandType != DX) logger.warn("Operand type for I/O port addressing should be DX"); op = X86Registers.DX; break; default: logger.error("Error decoding operand: Unsupported addressing mode: " + addrMode + "\n Register code: " + regOrOpcode); } if (op == null) throw new RuntimeException("Unable to decode instruction operand for addressing mode " + addrMode + ", operand type " + operandType + ", and operand size " + operandSize); return op; } private X86SegmentRegister getSegmentRegisterFromPrefix(int prefixes) { X86SegmentRegister segRegister = null; if ( (prefixes & PREFIX_CS) != 0) segRegister = X86SegmentRegisters.CS; if ( (prefixes & PREFIX_DS) != 0) segRegister = X86SegmentRegisters.DS; if ( (prefixes & PREFIX_ES) != 0) segRegister = X86SegmentRegisters.ES; if ( (prefixes & PREFIX_FS) != 0) segRegister = X86SegmentRegisters.FS; if ( (prefixes & PREFIX_SS) != 0) segRegister = X86SegmentRegisters.SS; if ( (prefixes & PREFIX_GS) != 0) segRegister = X86SegmentRegisters.GS; return segRegister; } private boolean isModRMPresent(int addrMode) { if( (addrMode == ADDR_E) || (addrMode == ADDR_G) || (addrMode == ADDR_FPREG) || (addrMode == ADDR_Q) || (addrMode == ADDR_W) || (addrMode == ADDR_C) || (addrMode == ADDR_D)) return true; else return false; } }